Elenco (non esaustivo) di pubblicazioni prodotte dai membri del laboratorio
2024
Singh, Jaspreet; Soprano, Michael; Roitero, Kevin; Ceolin, Davide
Crowdsourcing Statement Classification to Enhance Information Quality Prediction Proceedings Article
In: Preuss, Mike; Leszkiewicz, Agata; Boucher, Jean-Christopher; Fridman, Ofer; Stampe, Lucas (Ed.): Proceedings of the 6th Multidisciplinary International Symposium on Disinformation in Open Online Media (MISDOOM 2024), pp. 70–85, Springer Nature Switzerland, Münster, Germany, 2024, ISBN: 978-3-031-71210-4.
@inproceedings{10.1007/978-3-031-71210-4_5,
title = {Crowdsourcing Statement Classification to Enhance Information Quality Prediction},
author = {Jaspreet Singh and Michael Soprano and Kevin Roitero and Davide Ceolin},
editor = {Mike Preuss and Agata Leszkiewicz and Jean-Christopher Boucher and Ofer Fridman and Lucas Stampe},
url = {https://link.springer.com/chapter/10.1007/978-3-031-71210-4_5},
doi = {10.1007/978-3-031-71210-4_5},
isbn = {978-3-031-71210-4},
year = {2024},
date = {2024-08-31},
urldate = {2024-01-01},
booktitle = {Proceedings of the 6th Multidisciplinary International Symposium on Disinformation in Open Online Media (MISDOOM 2024)},
pages = {70–85},
publisher = {Springer Nature Switzerland},
address = {Münster, Germany},
series = {Lecture Notes in Computer Science},
abstract = {This paper explores the use of crowdsourcing to classify statement types in film reviews to assess their information quality. Employing the Argument Type Identification Procedure which uses the Periodic Table of Arguments to categorize arguments, the study aims to connect statement types to the overall argument strength and information reliability. Focusing on non-expert annotators in a crowdsourcing environment, the research assesses their reliability based on various factors including language proficiency and annotation experience. Results indicate the importance of careful annotator selection and training to achieve high inter-annotator agreement and highlight challenges in crowdsourcing statement classification for information quality assessment.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Soprano, Michael; Roitero, Kevin; Gadiraju, Ujwal; Maddalena, Eddy; Demartini, Gianluca
Longitudinal Loyalty: Understanding The Barriers To Running Longitudinal Studies On Crowdsourcing Platforms Journal Article
In: ACM Transactions on Social Computing, vol. 1, iss. 1, no 1, pp. 50, 2024, ISSN: 2469-7818.
@article{10.1145/3674884,
title = {Longitudinal Loyalty: Understanding The Barriers To Running Longitudinal Studies On Crowdsourcing Platforms},
author = {Michael Soprano and Kevin Roitero and Ujwal Gadiraju and Eddy Maddalena and Gianluca Demartini},
editor = {ACM},
url = {https://doi.org/10.1145/3674884},
doi = {10.1145/3674884},
issn = {2469-7818},
year = {2024},
date = {2024-08-11},
urldate = {2024-08-11},
journal = {ACM Transactions on Social Computing},
volume = {1},
number = {1},
issue = {1},
pages = {50},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Crowdsourcing tasks have been widely used to collect a large number of human labels at scale. While some of these tasks are deployed by requesters and performed only once by crowd workers, others require the same worker to perform the same task or a variant of it more than once, thus participating in a so-called longitudinal study. Despite the prevalence of longitudinal studies in crowdsourcing, there is a limited understanding of factors that influence worker participation in them across different crowdsourcing marketplaces. We present results from a large-scale survey of 300 workers on 3 different micro-task crowdsourcing platforms: Amazon Mechanical Turk, Prolific and Toloka. The aim is to understand how longitudinal studies are performed using crowdsourcing. We collect answers about 547 experiences and we analyze them both quantitatively and qualitatively. We synthesize 17 take-home messages about longitudinal studies together with 8 recommendations for task requesters and 5 best practices for crowdsourcing platforms to adequately conduct and support such kinds of studies. We release the survey and the data at: https://osf.io/h4du9/.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zeng, Xia; Barbera, David La; Roitero, Kevin; Zubiaga, Arkaitz; Mizzaro, Stefano
Combining Large Language Models and Crowdsourcing for Hybrid Human-AI Misinformation Detection Proceedings Article
In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2332–2336, Association for Computing Machinery, Washington DC, USA, 2024, ISBN: 9798400704314.
@inproceedings{10.1145/3626772.3657965,
title = {Combining Large Language Models and Crowdsourcing for Hybrid Human-AI Misinformation Detection},
author = {Xia Zeng and David La Barbera and Kevin Roitero and Arkaitz Zubiaga and Stefano Mizzaro},
url = {https://doi.org/10.1145/3626772.3657965},
doi = {10.1145/3626772.3657965},
isbn = {9798400704314},
year = {2024},
date = {2024-07-11},
urldate = {2024-07-11},
booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {2332–2336},
publisher = {Association for Computing Machinery},
address = {Washington DC, USA},
series = {SIGIR '24},
abstract = {Research on misinformation detection has primarily focused either on furthering Artificial Intelligence (AI) for automated detection or on studying humans' ability to deliver an effective crowdsourced solution. Each of these directions however shows different benefits. This motivates our work to study hybrid human-AI approaches jointly leveraging the potential of large language models and crowdsourcing, which is understudied to date. We propose novel combination strategies Model First, Worker First, and Meta Vote, which we evaluate along with baseline methods such as mean, median, hard- and soft-voting. Using 120 statements from the PolitiFact dataset, and a combination of state-of-the-art AI models and crowdsourced assessments, we evaluate the effectiveness of these combination strategies. Results suggest that the effectiveness varies with scales granularity, and that combining AI and human judgments enhances truthfulness assessments' effectiveness and robustness.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Maddalena, Eddy; Soprano, Michael; Roitero, Kevin; Demartini, Gianluca; Ceolin, Davide; Spina, Damiano; Mizzaro, Stefano
Crowdsourced Fact-checking: Does It Actually Work? Journal Article
In: Information Processing & Management, vol. 61, no 5, pp. 103792, 2024, ISSN: 0306-4573.
@article{BARBERA2024103792b,
title = {Crowdsourced Fact-checking: Does It Actually Work?},
author = {David La Barbera and Eddy Maddalena and Michael Soprano and Kevin Roitero and Gianluca Demartini and Davide Ceolin and Damiano Spina and Stefano Mizzaro},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324001523},
doi = {10.1016/j.ipm.2024.103792},
issn = {0306-4573},
year = {2024},
date = {2024-05-31},
urldate = {2024-05-31},
journal = {Information Processing & Management},
volume = {61},
number = {5},
pages = {103792},
abstract = {There is an important ongoing effort aimed to tackle misinformation and to perform reliable fact-checking by employing human assessors at scale, with a crowdsourcing-based approach. Previous studies on the feasibility of employing crowdsourcing for the task of misinformation detection have provided inconsistent results: some of them seem to confirm the effectiveness of crowdsourcing for assessing the truthfulness of statements and claims, whereas others fail to reach an effectiveness level higher than automatic machine learning approaches, which are still unsatisfactory. In this paper, we aim at addressing such inconsistency and understand if truthfulness assessment can indeed be crowdsourced effectively. To do so, we build on top of previous studies; we select some of those reporting low effectiveness levels, we highlight their potential limitations, and we then reproduce their work attempting to improve their setup to address those limitations. We employ various approaches, data quality levels, and agreement measures to assess the reliability of crowd workers when assessing the truthfulness of (mis)information. Furthermore, we explore different worker features and compare the results obtained with different crowds. According to our findings, crowdsourcing can be used as an effective methodology to tackle misinformation at scale. When compared to previous studies, our results indicate that a significantly higher agreement between crowd workers and experts can be obtained by using a different, higher-quality, crowdsourcing platform and by improving the design of the crowdsourcing task. Also, we find differences concerning task and worker features and how workers provide truthfulness assessments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baroni, Giulia Lucrezia; Rasotto, Laura; Roitero, Kevin; Tulisso, Angelica; Loreto, Carla Di; Mea, Vincenzo Della
Optimizing Vision Transformers for Histopathology: Pretraining and Normalization in Breast Cancer Classification Journal Article
In: Journal of Imaging, vol. 10, no 5, 2024, ISSN: 2313-433X.
@article{jimaging10050108,
title = {Optimizing Vision Transformers for Histopathology: Pretraining and Normalization in Breast Cancer Classification},
author = {Giulia Lucrezia Baroni and Laura Rasotto and Kevin Roitero and Angelica Tulisso and Carla Di Loreto and Vincenzo Della Mea},
editor = {MDPI},
url = {https://www.mdpi.com/2313-433X/10/5/108},
doi = {10.3390/jimaging10050108},
issn = {2313-433X},
year = {2024},
date = {2024-05-28},
urldate = {2024-04-30},
journal = {Journal of Imaging},
volume = {10},
number = {5},
abstract = {This paper introduces a self-attention Vision Transformer model specifically developed for classifying breast cancer in histology images. We examine various training strategies and configurations, including pretraining, dimension resizing, data augmentation and color normalization strategies, patch overlap, and patch size configurations, in order to evaluate their impact on the effectiveness of the histology image classification. Additionally, we provide evidence for the increase in effectiveness gathered through geometric and color data augmentation techniques. We primarily utilize the BACH dataset to train and validate our methods and models, but we also test them on two additional datasets, BRACS and AIDPATH, to verify their generalization capabilities. Our model, developed from a transformer pretrained on ImageNet, achieves an accuracy rate of 0.91 on the BACH dataset, 0.74 on the BRACS dataset, and 0.92 on the AIDPATH dataset. Using a model based on the prostate small and prostate medium HistoEncoder models, we achieve accuracy rates of 0.89 and 0.86, respectively. Our results suggest that pretraining on large-scale general datasets like ImageNet is advantageous. We also show the potential benefits of using domain-specific pretraining datasets, such as extensive histopathological image collections as in HistoEncoder, though not yet with clear advantages.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fiorin, Alessio; Pablo, Carlos López; Lejeune, Marylène; Siraj, Ameer Hamza; Mea, Vincenzo Della
Enhancing AI Research for Breast Cancer: A Comprehensive Review of Tumor-Infiltrating Lymphocyte Datasets Journal Article
In: Journal of Imaging Informatics in Medicine, 2024, ISSN: 2948-2933.
@article{Fiorin2024,
title = {Enhancing AI Research for Breast Cancer: A Comprehensive Review of Tumor-Infiltrating Lymphocyte Datasets},
author = {Alessio Fiorin and Carlos López Pablo and Marylène Lejeune and Ameer Hamza Siraj and Vincenzo Della Mea},
url = {https://doi.org/10.1007/s10278-024-01043-8},
doi = {10.1007/s10278-024-01043-8},
issn = {2948-2933},
year = {2024},
date = {2024-05-01},
journal = {Journal of Imaging Informatics in Medicine},
abstract = {The field of immunology is fundamental to our understanding of the intricate dynamics of the tumor microenvironment. In particular, tumor-infiltrating lymphocyte (TIL) assessment emerges as essential aspect in breast cancer cases. To gain comprehensive insights, the quantification of TILs through computer-assisted pathology (CAP) tools has become a prominent approach, employing advanced artificial intelligence models based on deep learning techniques. The successful recognition of TILs requires the models to be trained, a process that demands access to annotated datasets. Unfortunately, this task is hampered not only by the scarcity of such datasets, but also by the time-consuming nature of the annotation phase required to create them. Our review endeavors to examine publicly accessible datasets pertaining to the TIL domain and thereby become a valuable resource for the TIL community. The overall aim of the present review is thus to make it easier to train and validate current and upcoming CAP tools for TIL assessment by inspecting and evaluating existing publicly available online datasets.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Demartini, Gianluca; Mizzaro, Stefano
Cognitive Biases in Fact-Checking and Their Countermeasures: A Review Journal Article
In: Information Processing & Management, vol. 61, no 3, pp. 103672, 2024, ISSN: 0306-4573.
@article{SOPRANO2024103672,
title = {Cognitive Biases in Fact-Checking and Their Countermeasures: A Review},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Gianluca Demartini and Stefano Mizzaro},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324000323},
doi = {10.1016/j.ipm.2024.103672},
issn = {0306-4573},
year = {2024},
date = {2024-02-11},
urldate = {2024-01-01},
journal = {Information Processing & Management},
volume = {61},
number = {3},
pages = {103672},
abstract = {The increase of the amount of misinformation spread every day online is a huge threat to the society. Organizations and researchers are working to contrast this misinformation plague. In this setting, human assessors are indispensable to correctly identify, assess and/or revise the truthfulness of information items, i.e., to perform the fact-checking activity. Assessors, as humans, are subject to systematic errors that might interfere with their fact-checking activity. Among such errors, cognitive biases are those due to the limits of human cognition. Although biases help to minimize the cost of making mistakes, they skew assessments away from an objective perception of information. Cognitive biases, hence, are particularly frequent and critical, and can cause errors that have a huge potential impact as they propagate not only in the community, but also in the datasets used to train automatic and semi-automatic machine learning models to fight misinformation. In this work, we present a review of the cognitive biases which might occur during the fact-checking process. In more detail, inspired by PRISMA – a methodology used for systematic literature reviews – we manually derive a list of 221 cognitive biases that may affect human assessors. Then, we select the 39 biases that might manifest during the fact-checking process, we group them into categories, and we provide a description. Finally, we present a list of 11 countermeasures that can be adopted by researchers, practitioners, and organizations to limit the effect of the identified cognitive biases on the fact-checking activity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baroni, Giulia L.; Rasotto, Laura; Roitero, Kevin; Siraj, Ameer Hamza; Mea, V. Della
Vision Transformers for Breast Cancer Histology Image Classification Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): Image Analysis and Processing - ICIAP 2023 Workshops, pp. 15–26, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-51026-7.
@inproceedings{10.1007/978-3-031-51026-7_2,
title = {Vision Transformers for Breast Cancer Histology Image Classification},
author = {Giulia L. Baroni and Laura Rasotto and Kevin Roitero and Ameer Hamza Siraj and V. Della Mea},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
doi = {10.1007/978-3-031-51026-7_2},
isbn = {978-3-031-51026-7},
year = {2024},
date = {2024-01-21},
urldate = {2024-01-01},
booktitle = {Image Analysis and Processing - ICIAP 2023 Workshops},
pages = {15–26},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We propose a self-attention Vision Transformer (ViT) model tailored for breast cancer histology image classification. The proposed architecture uses a stack of transformer layers, with each layer consisting of a multi-head self-attention mechanism and a position-wise feed-forward network, and it is trained with different strategies and configurations, including pretraining, resize dimension, data augmentation, patch overlap, and patch size, to investigate their impact on performance on the histology image classification task. Experimental results show that pretraining on ImageNet and using geometric and color data augmentation techniques significantly improve the model's accuracy on the task. Additionally, a patch size of 16 $$backslashtimes $$× 16 and no patch overlap were found to be optimal for this task. These findings provide valuable insights for the design of future ViT-based models for similar image classification tasks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ros, Francesca Da; Gaspero, Luca Di; Roitero, Kevin; Barbera, David La; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca; Deroma, Laura
Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region Journal Article
In: Journal of Healthcare Informatics Research, 2024, ISSN: 2509-498X.
@article{DaRos2024,
title = {Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region},
author = {Francesca Da Ros and Luca Di Gaspero and Kevin Roitero and David La Barbera and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent and Laura Deroma},
url = {https://doi.org/10.1007/s41666-023-00154-1},
doi = {10.1007/s41666-023-00154-1},
issn = {2509-498X},
year = {2024},
date = {2024-01-09},
urldate = {2024-01-09},
journal = {Journal of Healthcare Informatics Research},
abstract = {Emergency Medical Services (EMS) are crucial in delivering timely and effective medical care to patients in need. However, the complex and dynamic nature of operations poses challenges for decision-making processes at strategic, tactical, and operational levels. This paper proposes an action-driven strategy for EMS management, employing a multi-objective optimizer and a simulator to evaluate potential outcomes of decisions. The approach combines historical data with dynamic simulations and multi-objective optimization techniques to inform decision-makers and improve the overall performance of the system. The research focuses on the Friuli Venezia Giulia region in north-eastern Italy. The region encompasses various landscapes and demographic situations that challenge fairness and equity in service access. Similar challenges are faced in other regions with comparable characteristics. The Decision Support System developed in this work accurately models the real-world system and provides valuable feedback and suggestions to EMS professionals, enabling them to make informed decisions and enhance the efficiency and fairness of the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Data Bias Management Journal Article
In: Commun. ACM, vol. 67, no 1, pp. 28–32, 2023, ISSN: 0001-0782.
@article{10.1145/3611641,
title = {Data Bias Management},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://doi.org/10.1145/3611641},
doi = {10.1145/3611641},
issn = {0001-0782},
year = {2023},
date = {2023-12-21},
urldate = {2023-12-01},
journal = {Commun. ACM},
volume = {67},
number = {1},
pages = {28–32},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Envisioning a unique approach toward bias and fairness research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Mea, Vincenzo Della; Mizzaro, Stefano
Towards a Conversational-Based Agent for Health Services Proceedings Article
In: Falchi, Fabrizio; Giannotti, Fosca; Monreale, Anna; Boldrini, Chiara; Rinzivillo, Salvatore; Colantonio, Sara (Ed.): Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence, pp. 278–283, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/italia2023/Soprano23,
title = {Towards a Conversational-Based Agent for Health Services},
author = {Michael Soprano and Kevin Roitero and Vincenzo Della Mea and Stefano Mizzaro},
editor = {Fabrizio Falchi and Fosca Giannotti and Anna Monreale and Chiara Boldrini and Salvatore Rinzivillo and Sara Colantonio},
url = {https://ceur-ws.org/Vol-3486/96.pdf},
year = {2023},
date = {2023-09-20},
urldate = {2023-01-01},
booktitle = {Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence},
volume = {3486},
pages = {278–283},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {Conversational agents provide new modalities to access and interact with services and applications. Recently, they saw a backfire in their popularity, due to the recent advancements in language models. Such agents have been adopted in various fields such as healthcare and education, yet they received little attention in public administration. We describe as a practical use case a service of the portal that provides citizens of the Italian region of Friuli-Venezia Giulia with services related to their own Electronic Health Records. The service considered allows them to search for the available doctors and pediatricians in the region's municipalities. We rely on the use case described to propose a model for a conversational agent-based access modality. The model proposed allows us to lay the foundation for more advanced chatbot-like implementations which will use also alternative input modalities, such as voice-based communication.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Soprano, Michael; Roitero, Kevin; Maddalena, Eddy; Mizzaro, Stefano
Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned Proceedings Article
In: Nardini, Franco Maria; Tonelotto, Nicola; Faggioli, Guglielmo; Ferrara, Antonio (Ed.): Proceedings of the 13th Italian Information Retrieval Workshop, pp. 85–90, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/iir/BarberaSRMM23,
title = {Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned},
author = {David La Barbera and Michael Soprano and Kevin Roitero and Eddy Maddalena and Stefano Mizzaro},
editor = {Franco Maria Nardini and Nicola Tonelotto and Guglielmo Faggioli and Antonio Ferrara},
url = {https://ceur-ws.org/Vol-3448/paper-18.pdf},
year = {2023},
date = {2023-08-26},
urldate = {2023-08-15},
booktitle = {Proceedings of the 13th Italian Information Retrieval Workshop},
volume = {3448},
pages = {85–90},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {In this paper, we present our journey in exploring the use of crowdsourcing for fact-checking. We discuss our early experiments aimed towards the identification of the best possible setting for misinformation assessment using crowdsourcing. Our results indicate that the crowd can effectively address misinformation at scale, showing some degree of correlation with experts. We also highlight the influence of worker background on the quality of truthfulness assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Barbera, David La; Soprano, Michael; Demartini, Gianluca; Mizzaro, Stefano; Sakai, Tetsuya
How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments Journal Article
In: ACM Transactions on Information Systems, 2023, ISSN: 1046-8188, (Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)).
@article{10.1145/3597201,
title = {How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and David La Barbera and Michael Soprano and Gianluca Demartini and Stefano Mizzaro and Tetsuya Sakai},
doi = {10.1145/3597201},
issn = {1046-8188},
year = {2023},
date = {2023-08-18},
urldate = {2023-01-01},
journal = {ACM Transactions on Information Systems},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {To scale the size of Information Retrieval collections, crowdsourcing has become a common way to collect relevance judgments at scale. Crowdsourcing experiments usually employ 100-10,000 workers, but such a number is often decided in a heuristic way. The downside is that the resulting dataset does not have any guarantee of meeting predefined statistical requirements as, for example, have enough statistical power to be able to distinguish in a statistically significant way between the relevance of two documents. We propose a methodology adapted from literature on sound topic set size design, based on t-test and ANOVA, which aims at guaranteeing the resulting dataset to meet a predefined set of statistical requirements. We validate our approach on several public datasets. Our results show that we can reliably estimate the recommended number of workers needed to achieve statistical power, and that such estimation is dependent on the topic, while the effect of the relevance scale is limited. Furthermore, we found that such estimation is dependent on worker features such as agreement. Finally, we describe a set of practical estimation strategies that can be used to estimate the worker set size, and we also provide results on the estimation of document set sizes.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xie, Haoyu; Maddalena, Eddy; Qarout, Rehab; Checco, Alessandro
The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces Journal Article
In: Computer Supported Cooperative Work (CSCW), vol. 32, no 3, pp. 439-474, 2023, ISSN: 1573-7551.
@article{Xie2023b,
title = {The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces},
author = {Haoyu Xie and Eddy Maddalena and Rehab Qarout and Alessandro Checco},
url = {https://doi.org/10.1007/s10606-023-09464-9},
doi = {10.1007/s10606-023-09464-9},
issn = {1573-7551},
year = {2023},
date = {2023-07-28},
urldate = {2023-09-01},
journal = {Computer Supported Cooperative Work (CSCW)},
volume = {32},
number = {3},
pages = {439-474},
abstract = {Micro-task crowdsourcing marketplaces like Figure Eight (F8) connect a large pool of workers to employers through a single online platform, by aggregating multiple crowdsourcing platforms (channels) under a unique system. This paper investigates the F8 channels' demographic distribution and reward schemes by analysing more than 53k crowdsourcing tasks over four years, collecting survey data and scraping marketplace metadata. We reveal an heterogeneous per-channel demographic distribution, and an opaque channel commission scheme, that varies over time and is not communicated to the employer when launching a task: workers often will receive a smaller payment than expected by the employer. In addition, the impact of channel commission schemes on the relationship between requesters and crowdworkers is explored. These observations uncover important issues on ethics, reliability and transparency of crowdsourced experiment when using this kind of marketplaces, especially for academic research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Maddalena, Eddy; Ibáñez, Luis-Daniel; Reeves, Neal; Simperl, Elena
Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives Journal Article
In: ACM Trans. Intell. Syst. Technol., 2023, ISSN: 2157-6904, (Just Accepted).
@article{10.1145/3604940,
title = {Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives},
author = {Eddy Maddalena and Luis-Daniel Ibáñez and Neal Reeves and Elena Simperl},
url = {https://doi.org/10.1145/3604940},
doi = {10.1145/3604940},
issn = {2157-6904},
year = {2023},
date = {2023-06-01},
journal = {ACM Trans. Intell. Syst. Technol.},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Microtask crowdsourcing platforms are social intelligence systems in which volunteers, called crowdworkers, complete small, repetitive tasks in return for a small fee. Beyond payments, task requesters are considering non-monetary incentives such as points, badges and other gamified elements to increase performance and improve crowdworker experience. In this paper, we present Qrowdsmith, a platform for gamifying microtask crowdsourcing. To design the system, we explore empirically a range of gamified and financial incentives and analyse their impact on how efficient, effective, and reliable the results are. To maintain participation over time and save costs, we propose furtherance incentives, which are offered to crowdworkers to encourage additional contributions in addition to the fee agreed upfront. In a series of controlled experiments we find that while gamification can work as furtherance incentives, it impacts negatively on crowdworkers performance, both in terms of the quantity and quality of work, as compared to a baseline where they can continue to contribute voluntarily. Gamified incentives are also less effective than paid bonus equivalents. Our results contribute to the understanding of how best to encourage engagement in microtask crowdsourcing activities, and design better crowd intelligence systems.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Martinuzzi, Andrea; Armellin, Maria Teresa; Paparella, Gabriella; Maniero, Alberto; Mea, Vincenzo Della
Automated ICF Coding of Rehabilitation Notes for Low-Resource Languages via Continual Training of Language Models Journal Article
In: Studies in Health Technology and Informatics, vol. 302, pp. 763–767, 2023, ISSN: 1879-8365.
@article{Roitero2023,
title = {Automated ICF Coding of Rehabilitation Notes for Low-Resource Languages via Continual Training of Language Models},
author = {Kevin Roitero and Andrea Martinuzzi and Maria Teresa Armellin and Gabriella Paparella and Alberto Maniero and Vincenzo Della Mea},
editor = {IOS Press},
doi = {10.3233/SHTI230262},
issn = {1879-8365},
year = {2023},
date = {2023-05-18},
urldate = {2023-05-18},
journal = {Studies in Health Technology and Informatics},
volume = {302},
pages = {763–767},
publisher = {IOS Press},
abstract = {The coding of medical documents and in particular of rehabilitation notes using the International Classification of Functioning, Disability and Health (ICF) is a difficult task showing low agreement among experts. Such difficulty is mainly caused by the specific terminology that needs to be used for the task. In this paper, we address the task developing a model based on a large language model, BERT. By leveraging continual training of such a model using ICF textual descriptions, we are able to effectively encode rehabilitation notes expressed in Italian, an under-resourced language.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Roitero, Kevin; Guo, Furong
Predicting Crowd Workers Performance: An Information Quality Case Proceedings Article
In: Garrigós, Irene; Rodríguez, Juan Manuel Murillo; Wimmer, Manuel (Ed.): Web Engineering, pp. 75–90, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-34444-2.
@inproceedings{10.1007/978-3-031-34444-2_6,
title = {Predicting Crowd Workers Performance: An Information Quality Case},
author = {Davide Ceolin and Kevin Roitero and Furong Guo},
editor = {Irene Garrigós and Juan Manuel Murillo Rodríguez and Manuel Wimmer},
doi = {10.1007/978-3-031-34444-2_6},
isbn = {978-3-031-34444-2},
year = {2023},
date = {2023-01-01},
booktitle = {Web Engineering},
pages = {75–90},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Supervised machine learning tasks require human-labeled data. Crowdsourcing allows scaling up the labeling process, but the quality of the labels obtained can vary. To address this limitation, we propose methods for predicting label quality based on worker trajectories, i.e., on the sequence of documents workers explore during their crowdsourcing tasks. Trajectories represent a lightweight and non-intrusive form of worker behavior signal. We base our analysis on previously collected datasets composed of thousands of assessment data records including information such as workers' trajectories, workers' assessments, and experts' assessments. We model such behavior sequences as embeddings, to facilitate their management. Then, we: (1) use supervised methods to predict worker performance using a given ground truth; (2) perform an unsupervised analysis to provide insight into crowdsourcing quality when no gold standard is available. We test several supervised approaches which all beat the baseline we propose. Also, we identify significant differences between trajectory clusters in terms of assessments and worker performance. The trajectory-based analysis is a promising direction for non-intrusive worker performance evaluation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Serra, Giuseppe; Mea, Vincenzo Della; Mizzaro, Stefano; Cerro, Gianni; Vitelli, Michele; Molinara, Mario
Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform Journal Article
In: IEEE Access, vol. 11, pp. 50272–50284, 2023, ISSN: 2169-3536.
@article{10129181,
title = {Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform},
author = {Kevin Roitero and Beatrice Portelli and Giuseppe Serra and Vincenzo Della Mea and Stefano Mizzaro and Gianni Cerro and Michele Vitelli and Mario Molinara},
doi = {10.1109/ACCESS.2023.3277535},
issn = {2169-3536},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {50272–50284},
abstract = {The detection of contaminants in several environments (e.g., air, water, sewage systems) is of paramount importance to protect people and predict possible dangerous circumstances. Most works do this using classical Machine Learning tools that act on the acquired measurement data. This paper introduces two main elements: a low-cost platform to acquire, pre-process, and transmit data to classify contaminants in wastewater; and a novel classification approach to classify contaminants in wastewater, based on deep learning and the transformation of raw sensor data into natural language metadata. The proposed solution presents clear advantages against state-of-the-art systems in terms of higher effectiveness and reasonable efficiency. The main disadvantage of the proposed approach is that it relies on knowing the injection time, i.e., the instant in time when the contaminant is injected into the wastewater. For this reason, the developed system also includes a finite state machine tool able to infer the exact time instant when the substance is injected. The entire system is presented and discussed in detail. Furthermore, several variants of the proposed processing technique are also presented to assess the sensitivity to the number of used samples and the corresponding promptness/computational burden of the system. The lowest accuracy obtained by our technique is 91.4%, which is significantly higher than the 81.0% accuracy reached by the best baseline method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Deldjoo, Yashar; Mizzaro, Stefano; Bellogín, Alejandro
A unifying and general account of fairness measurement in recommender systems Journal Article
In: Information Processing & Management, vol. 60, no 1, pp. 103115, 2023, ISSN: 0306-4573.
@article{AMIGO2023103115,
title = {A unifying and general account of fairness measurement in recommender systems},
author = {Enrique Amigó and Yashar Deldjoo and Stefano Mizzaro and Alejandro Bellogín},
url = {https://www.sciencedirect.com/science/article/pii/S0306457322002163},
doi = {https://doi.org/10.1016/j.ipm.2022.103115},
issn = {0306-4573},
year = {2023},
date = {2023-01-01},
journal = {Information Processing & Management},
volume = {60},
number = {1},
pages = {103115},
abstract = {Fairness is fundamental to all information access systems, including recommender systems. However, the landscape of fairness definition and measurement is quite scattered with many competing definitions that are partial and often incompatible. There is much work focusing on specific – and different – notions of fairness and there exist dozens of metrics of fairness in the literature, many of them redundant and most of them incompatible. In contrast, to our knowledge, there is no formal framework that covers all possible variants of fairness and allows developers to choose the most appropriate variant depending on the particular scenario. In this paper, we aim to define a general, flexible, and parameterizable framework that covers a whole range of fairness evaluation possibilities. Instead of modeling the metrics based on an abstract definition of fairness, the distinctive feature of this study compared to the current state of the art is that we start from the metrics applied in the literature to obtain a unified model by generalization. The framework is grounded on a general work hypothesis: interpreting the space of users and items as a probabilistic sample space, two fundamental measures in information theory (Kullback–Leibler Divergence and Mutual Information) can capture the majority of possible scenarios for measuring fairness on recommender system outputs. In addition, earlier research on fairness in recommender systems could be viewed as single-sided, trying to optimize some form of equity across either user groups or provider/procurer groups, without considering the user/item space in conjunction, thereby overlooking/disregarding the interplay between user and item groups. Instead, our framework includes the notion of statistical independence between user and item groups. We finally validate our approach experimentally on both synthetic and real data according to a wide range of state-of-the-art recommendation algorithms and real-world data sets, showing that with our framework we can measure fairness in a general, uniform, and meaningful way.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Gonzalo, Julio; Mizzaro, Stefano
What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory Journal Article
In: IEEE Transactions on Knowledge and Data Engineering, vol. 35, no 2, pp. 2147–2157, 2023.
@article{9528028,
title = {What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory},
author = {Enrique Amigó and Julio Gonzalo and Stefano Mizzaro},
doi = {10.1109/TKDE.2021.3109823},
year = {2023},
date = {2023-01-01},
journal = {IEEE Transactions on Knowledge and Data Engineering},
volume = {35},
number = {2},
pages = {2147–2157},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
Brand, Erik; Roitero, Kevin; Soprano, Michael; Rahimi, Afshin; Demartini, Gianluca
A Neural Model to Jointly Predict and Explain Truthfulness of Statements Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546917,
title = {A Neural Model to Jointly Predict and Explain Truthfulness of Statements},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Afshin Rahimi and Gianluca Demartini},
url = {https://doi.org/10.1145/3546917},
doi = {10.1145/3546917},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity usually makes them opaque to the end user, making it difficult to foster trust in the system. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-the-art on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. We also calibrate the E-BART model, allowing the output of the final model be correctly interpreted as the confidence of correctness. Finally, we also conduct and extensive human evaluation on the impact of generated explanations and observe that: explanations increase human ability to spot misinformation and make people more skeptical about claims, and explanations generated by E-BART are competitive with ground truth explanations.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Qu, Yunke; Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Combining Human and Machine Confidence in Truthfulness Assessment Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546916,
title = {Combining Human and Machine Confidence in Truthfulness Assessment},
author = {Yunke Qu and David La Barbera and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
url = {https://doi.org/10.1145/3546916},
doi = {10.1145/3546916},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automatically detecting online misinformation at scale is a challenging and interdisciplinary problem. Deciding what is to be considered truthful information is sometimes controversial and difficult also for educated experts. As the scale of the problem increases, human-in-the-loop approaches to truthfulness that combine both the scalability of machine learning (ML) and the accuracy of human contributions have been considered. In this work we look at the potential to automatically combine machine-based systems with human-based systems. The former exploit supervised ML approaches; the latter involve either crowd workers (i.e., human non-experts) or human experts. Since both ML and crowdsourcing approaches can produce a score indicating the level of confidence on their truthfulness judgments (either algorithmic or self-reported, respectively), we address the question of whether it is feasible to make use of such confidence scores to effectively and efficiently combine three approaches: (i) machine-based methods; (ii) crowd workers, and (iii) human experts. The three approaches differ significantly as they range from available, cheap, fast, scalable, but less accurate to scarce, expensive, slow, not scalable, but highly accurate.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Bona, Francesco Bombassei De; Mizzaro, Stefano
Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf Proceedings Article
In: Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining, pp. 1605–1608, Association for Computing Machinery, Virtual Event, AZ, USA, 2022, ISBN: 9781450391320.
@inproceedings{conference-paper-wsdm2022,
title = {Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf},
author = {Michael Soprano and Kevin Roitero and Francesco Bombassei De Bona and Stefano Mizzaro},
doi = {10.1145/3488560.3502182},
isbn = {9781450391320},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
pages = {1605–1608},
publisher = {Association for Computing Machinery},
address = {Virtual Event, AZ, USA},
series = {WSDM '22},
abstract = {Due to their relatively low cost and ability to scale, crowdsourcing based approaches are widely used to collect a large amount of human annotated data. To this aim, multiple crowdsourcing platforms exist, where requesters can upload tasks and workers can carry them out and obtain payment in return. Such platforms share a task design and deploy workflow that is often counter-intuitive and cumbersome. To address this issue, we propose Crowd_Frame, a simple and complete framework which allows to develop and deploy diverse types of complex crowdsourcing tasks in an easy and customizable way. We show the abilities of the proposed framework and we make it available to researchers and practitioners.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Checco, Alessandro; Mizzaro, Stefano; Demartini, Gianluca
Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments Proceedings Article
In: Proceedings of the ACM Web Conference 2022, pp. 319–327, Association for Computing Machinery, Virtual Event, Lyon, France, 2022, ISBN: 9781450390965.
@inproceedings{10.1145/3485447.3511960,
title = {Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and Alessandro Checco and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3485447.3511960},
doi = {10.1145/3485447.3511960},
isbn = {9781450390965},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the ACM Web Conference 2022},
pages = {319–327},
publisher = {Association for Computing Machinery},
address = {Virtual Event, Lyon, France},
series = {WWW '22},
abstract = {In Information Retrieval (IR) evaluation, preference judgments are collected by presenting to the assessors a pair of documents and asking them to select which of the two, if any, is the most relevant. This is an alternative to the classic relevance judgment approach, in which human assessors judge the relevance of a single document on a scale; such an alternative allows to make relative rather than absolute judgments of relevance. While preference judgments are easier for human assessors to perform, the number of possible document pairs to be judged is usually so high that it makes it unfeasible to judge them all. Thus, following a similar idea to pooling strategies for single document relevance judgments where the goal is to sample the most useful documents to be judged, in this work we focus on analyzing alternative ways to sample document pairs to judge, in order to maximize the value of a fixed number of preference judgments that can feasibly be collected. Such value is defined as how well we can evaluate IR systems given a budget, that is, a fixed number of human preference judgments that may be collected. By relying on several datasets featuring relevance judgments gathered by means of experts and crowdsourcing, we experimentally compare alternative strategies to select document pairs and show how different strategies lead to different IR evaluation result quality levels. Our results show that, by using the appropriate procedure, it is possible to achieve good IR evaluation results with a limited number of preference judgments, thus confirming the feasibility of using preference judgments to create IR evaluation collections.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Roitero, Kevin; Mackenzie, Joel; Damiano, Spina; Demartini, Gianluca; Mizzaro, Stefano
BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval Proceedings Article
In: andd Ferro Faggioli, Nicola Guglielmo; Hanbury, Allan; Potthast, Martin (Ed.): Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy, 2022.
@inproceedings{clef-checkthat:2022:task3:La_Barbera_BUM,
title = {BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval},
author = {David La Barbera and Kevin Roitero and Joel Mackenzie and Spina Damiano and Gianluca Demartini and Stefano Mizzaro},
editor = {Nicola Guglielmo andd Ferro Faggioli and Allan Hanbury and Martin Potthast},
year = {2022},
date = {2022-01-01},
booktitle = {Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum},
address = {Bologna, Italy},
series = {CLEF~'2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Draws, Tim; Barbera, David La; Soprano, Michael; Roitero, Kevin; Ceolin, Davide; Checco, Alessandro; Mizzaro, Stefano
The Effects of Crowd Worker Biases in Fact-Checking Tasks Proceedings Article
In: 2022 ACM Conference on Fairness, Accountability, and Transparency, pp. 2114–2124, Association for Computing Machinery, Seoul, Republic of Korea, 2022, ISBN: 9781450393522.
@inproceedings{10.1145/3531146.3534629,
title = {The Effects of Crowd Worker Biases in Fact-Checking Tasks},
author = {Tim Draws and David La Barbera and Michael Soprano and Kevin Roitero and Davide Ceolin and Alessandro Checco and Stefano Mizzaro},
url = {https://doi.org/10.1145/3531146.3534629},
doi = {10.1145/3531146.3534629},
isbn = {9781450393522},
year = {2022},
date = {2022-01-01},
booktitle = {2022 ACM Conference on Fairness, Accountability, and Transparency},
pages = {2114–2124},
publisher = {Association for Computing Machinery},
address = {Seoul, Republic of Korea},
series = {FAccT '22},
abstract = {Due to the increasing amount of information shared online every day, the need for sound and reliable ways of distinguishing between trustworthy and non-trustworthy information is as present as ever. One technique for performing fact-checking at scale is to employ human intelligence in the form of crowd workers. Although earlier work has suggested that crowd workers can reliably identify misinformation, cognitive biases of crowd workers may reduce the quality of truthfulness judgments in this context. We performed a systematic exploratory analysis of publicly available crowdsourced data to identify a set of potential systematic biases that may occur when crowd workers perform fact-checking tasks. Following this exploratory study, we collected a novel data set of crowdsourced truthfulness judgments to validate our hypotheses. Our findings suggest that workers generally overestimate the truthfulness of statements and that different individual characteristics (i.e., their belief in science) and cognitive biases (i.e., the affect heuristic and overconfidence) can affect their annotations. Interestingly, we find that, depending on the general judgment tendencies of workers, their biases may sometimes lead to more accurate judgments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ceschia, Sara; Roitero, Kevin; Demartini, Gianluca; Mizzaro, Stefano; Gaspero, Luca Di; Schaerf, Andrea
Task design in complex crowdsourcing experiments: Item assignment optimization Journal Article
In: Computers & Operations Research, pp. 105995, 2022, ISSN: 0305-0548.
@article{CESCHIA2022105995,
title = {Task design in complex crowdsourcing experiments: Item assignment optimization},
author = {Sara Ceschia and Kevin Roitero and Gianluca Demartini and Stefano Mizzaro and Luca Di Gaspero and Andrea Schaerf},
url = {https://www.sciencedirect.com/science/article/pii/S0305054822002295},
doi = {https://doi.org/10.1016/j.cor.2022.105995},
issn = {0305-0548},
year = {2022},
date = {2022-01-01},
journal = {Computers & Operations Research},
pages = {105995},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Soprano, Michael; Wielemaker, Jan
Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory Journal Article
In: Information Systems, vol. 110, pp. 102107, 2022, ISSN: 0306-4379, (Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)).
@article{CEOLIN2022102107,
title = {Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Michael Soprano and Jan Wielemaker},
doi = {10.1016/j.is.2022.102107},
issn = {0306-4379},
year = {2022},
date = {2022-01-01},
journal = {Information Systems},
volume = {110},
pages = {102107},
abstract = {Review scores collect users’ opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic distance, and the supported ratings. We also provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. (2015), by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method, i.e., to test whether the automated method we use to assess reviews is understandable by humans. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, and complete by online users, and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Mizzaro, Stefano; Spina, Damiano
Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That Proceedings Article
In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 588–598, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450387323.
@inproceedings{10.1145/3477495.3532051,
title = {Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That},
author = {Enrique Amigó and Stefano Mizzaro and Damiano Spina},
url = {https://doi.org/10.1145/3477495.3532051},
doi = {10.1145/3477495.3532051},
isbn = {9781450387323},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {588–598},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
series = {SIGIR '22},
abstract = {Most of information retrieval effectiveness evaluation metrics assume that systems appending irrelevant documents at the bottom of the ranking are as effective as (or not worse than) systems that have a stopping criteria to 'truncate' the ranking at the right position to avoid retrieving those irrelevant documents at the end. It can be argued, however, that such truncated rankings are more useful to the end user. It is thus important to understand how to measure retrieval effectiveness in this scenario. In this paper we provide both theoretical and experimental contributions. We first define formal properties to analyze how effectiveness metrics behave when evaluating truncated rankings. Our theoretical analysis shows that de-facto standard metrics do not satisfy desirable properties to evaluate truncated rankings: only Observational Information Effectiveness (OIE) – a metric based on Shannon's information theory – satisfies them all. We then perform experiments to compare several metrics on nine TREC datasets. According to our experimental results, the most appropriate metrics for truncated rankings are OIE and a novel extension of Rank-Biased Precision that adds a user effort factor penalizing the retrieval of irrelevant documents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Brand, Erik; Roitero, Kevin; Soprano, Michael; Demartini, Gianluca
E-BART: Jointly Predicting and Explaining Truthfulness Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 18–27, Hacks Hackers, 2021.
@inproceedings{conference-paper-tto-2021,
title = {E-BART: Jointly Predicting and Explaining Truthfulness},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_16-1.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021},
pages = {18--27},
publisher = {Hacks Hackers},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity makes them opaque to the end user, making it difficult to foster trust. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-theart on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. Finally, we also conduct human evaluations on the impact of generated explanations and observe that explanations increase human ability to spot misinformation and make people more skeptical about claims.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Portelli, Beatrice; Luise, Massimiliano De; Spina, Damiano; Mea, Vincenzo Della; Serra, Giuseppe; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19 Journal Article
In: Personal and Ubiquitous Computing, 2021, ISSN: 1617-4917.
@article{journal-paper-puc-2021,
title = {Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19},
author = {Kevin Roitero and Michael Soprano and Beatrice Portelli and Massimiliano De Luise and Damiano Spina and Vincenzo Della Mea and Giuseppe Serra and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1007/s00779-021-01604-6},
doi = {10.1007/s00779-021-01604-6},
issn = {1617-4917},
year = {2021},
date = {2021-01-01},
journal = {Personal and Ubiquitous Computing},
abstract = {Recently, the misinformation problem has been addressed with a crowdsourcing-based approach: to assess the truthfulness of a statement, instead of relying on a few experts, a crowd of non-expert is exploited. We study whether crowdsourcing is an effective and reliable method to assess truthfulness during a pandemic, targeting statements related to COVID-19, thus addressing (mis)information that is both related to a sensitive and personal issue and very recent as compared to when the judgment is done. In our experiments, crowd workers are asked to assess the truthfulness of statements, and to provide evidence for the assessments. Besides showing that the crowd is able to accurately judge the truthfulness of the statements, we report results on workers' behavior, agreement among workers, effect of aggregation functions, of scales transformations, and of workers background and bias. We perform a longitudinal study by re-launching the task multiple times with both novice and experienced workers, deriving important insights on how the behavior and quality change over time. Our results show that workers are able to detect and objectively categorize online (mis)information related to COVID-19; both crowdsourced and expert judgments can be transformed and aggregated to improve quality; worker background and other signals (e.g., source of information, behavior) impact the quality of the data. The longitudinal study demonstrates that the time-span has a major effect on the quality of the judgments, for both novice and experienced workers. Finally, we provide an extensive failure analysis of the statements misjudged by the crowd-workers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale Journal Article
In: Information Processing & Management, vol. 58, no 6, pp. 102710, 2021, ISSN: 0306-4573.
@article{journal-paper-ipm-2021,
title = {The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://www.sciencedirect.com/science/article/pii/S0306457321001941},
doi = {https://doi.org/10.1016/j.ipm.2021.102710},
issn = {0306-4573},
year = {2021},
date = {2021-01-01},
journal = {Information Processing & Management},
volume = {58},
number = {6},
pages = {102710},
abstract = {Recent work has demonstrated the viability of using crowdsourcing as a tool for evaluating the truthfulness of public statements. Under certain conditions such as: (1) having a balanced set of workers with different backgrounds and cognitive abilities; (2) using an adequate set of mechanisms to control the quality of the collected data; and (3) using a coarse grained assessment scale, the crowd can provide reliable identification of fake news. However, fake news are a subtle matter: statements can be just biased (“cherrypicked”), imprecise, wrong, etc. and the unidimensional truth scale used in existing work cannot account for such differences. In this paper we propose a multidimensional notion of truthfulness and we ask the crowd workers to assess seven different dimensions of truthfulness selected based on existing literature: Correctness, Neutrality, Comprehensibility, Precision, Completeness, Speaker’s Trustworthiness, and Informativeness. We deploy a set of quality control mechanisms to ensure that the thousands of assessments collected on 180 publicly available fact-checked statements distributed over two datasets are of adequate quality, including a custom search engine used by the crowd workers to find web pages supporting their truthfulness assessments. A comprehensive analysis of crowdsourced judgments shows that: (1) the crowdsourced assessments are reliable when compared to an expert-provided gold standard; (2) the proposed dimensions of truthfulness capture independent pieces of information; (3) the crowdsourcing task can be easily learned by the workers; and (4) the resulting assessments provide a useful basis for a more complete estimation of statement truthfulness.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Wielemaker, Jan; Soprano, Michael
Assessing the Quality of Online Reviews Using Formal Argumentation Theory Proceedings Article
In: Brambilla, Marco; Chbeir, Richard; Frasincar, Flavius; Manolescu, Ioana (Ed.): Web Engineering, pp. 71–87, Springer International Publishing, Cham, 2021, ISBN: 978-3-030-74296-6.
@inproceedings{10.1007/978-3-030-74296-6_6,
title = {Assessing the Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Jan Wielemaker and Michael Soprano},
editor = {Marco Brambilla and Richard Chbeir and Flavius Frasincar and Ioana Manolescu},
doi = {10.1007/978-3-030-74296-6_6},
isbn = {978-3-030-74296-6},
year = {2021},
date = {2021-01-01},
booktitle = {Web Engineering},
pages = {71--87},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {Review scores collect users' opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic similarity, and the supported ratings. We provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. [15], by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, truthful by online users and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Qu, Yunke; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Human-in-the-Loop Systems for Truthfulness: A Study of Human and Machine Confidence Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 40–49, Hacks Hackers, 2021.
@inproceedings{DBLP:conf/tto/QuRMSD21,
title = {Human-in-the-Loop Systems for Truthfulness: A Study of Human and
Machine Confidence},
author = {Yunke Qu and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_29.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021),
Virtual, October 7-8, 2021},
pages = {40--49},
publisher = {Hacks Hackers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Popescu, Mihai Horia; Mea, Vincenzo Della
DiLBERT: Cheap Embeddings for Disease Related Medical NLP Journal Article
In: IEEE Access, vol. 9, pp. 159714-159723, 2021.
@article{9628010,
title = {DiLBERT: Cheap Embeddings for Disease Related Medical NLP},
author = {Kevin Roitero and Beatrice Portelli and Mihai Horia Popescu and Vincenzo Della Mea},
doi = {10.1109/ACCESS.2021.3131386},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
volume = {9},
pages = {159714-159723},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal Journal Article
In: CoRR, vol. abs/2110.13504, 2021.
@article{DBLP:journals/corr/abs-2110-13504,
title = {Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://arxiv.org/abs/2110.13504},
year = {2021},
date = {2021-01-01},
journal = {CoRR},
volume = {abs/2110.13504},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Conde-Sousa, Eduardo; Vale, João; Feng, Ming; Xu, Kele; Wang, Yin; Mea, Vincenzo Della; Barbera, David La; Montahaei, Ehsan; Baghshah, Mahdieh Soleymani; Turzynski, Andreas; Gildenblat, Jacob; Klaiman, Eldad; Hong, Yiyu; Aresta, Guilherme; Araújo, Teresa; Aguiar, Paulo; Eloy, Catarina; Polónia, António
HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization Miscellaneous
2021.
@misc{https://doi.org/10.48550/arxiv.2111.04738,
title = {HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization},
author = {Eduardo Conde-Sousa and João Vale and Ming Feng and Kele Xu and Yin Wang and Vincenzo Della Mea and David La Barbera and Ehsan Montahaei and Mahdieh Soleymani Baghshah and Andreas Turzynski and Jacob Gildenblat and Eldad Klaiman and Yiyu Hong and Guilherme Aresta and Teresa Araújo and Paulo Aguiar and Catarina Eloy and António Polónia},
url = {https://arxiv.org/abs/2111.04738},
doi = {10.48550/ARXIV.2111.04738},
year = {2021},
date = {2021-01-01},
publisher = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca
A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report Proceedings Article
In: 2021 IEEE International Conference on Digital Health (ICDH), pp. 209-211, 2021.
@inproceedings{9581242,
title = {A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report},
author = {David La Barbera and Kevin Roitero and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent},
doi = {10.1109/ICDH52753.2021.00037},
year = {2021},
date = {2021-01-01},
booktitle = {2021 IEEE International Conference on Digital Health (ICDH)},
pages = {209-211},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Barbera, David La; Polónia, António; Roitero, Kevin; Conde-Sousa, Eduardo; Mea, Vincenzo Della
Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning Journal Article
In: Journal of Imaging, vol. 6, no 9, 2020, ISSN: 2313-433X.
@article{labarberaher2,
title = {Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning},
author = {David La Barbera and António Polónia and Kevin Roitero and Eduardo Conde-Sousa and Vincenzo Della Mea},
doi = {10.3390/jimaging6090082},
issn = {2313-433X},
year = {2020},
date = {2020-08-23},
urldate = {2020-08-23},
journal = {Journal of Imaging},
volume = {6},
number = {9},
abstract = {Breast cancer is the most frequently diagnosed cancer in woman. The correct identification of the HER2 receptor is a matter of major importance when dealing with breast cancer: an over-expression of HER2 is associated with aggressive clinical behaviour; moreover, HER2 targeted therapy results in a significant improvement in the overall survival rate. In this work, we employ a pipeline based on a cascade of deep neural network classifiers and multi-instance learning to detect the presence of HER2 from Haematoxylin-Eosin slides, which partly mimics the pathologist’s behaviour by first recognizing cancer and then evaluating HER2. Our results show that the proposed system presents a good overall effectiveness. Furthermore, the system design is prone to further improvements that can be easily deployed in order to increase the effectiveness score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Soprano, Michael; Fan, Shaoyang; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background Proceedings Article
In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 439–448, Association for Computing Machinery, Virtual Event, China, 2020, ISBN: 9781450380164.
@inproceedings{10.1145/3397271.3401112,
title = {Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background},
author = {Kevin Roitero and Michael Soprano and Shaoyang Fan and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3397271.3401112},
doi = {10.1145/3397271.3401112},
isbn = {9781450380164},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {439–448},
publisher = {Association for Computing Machinery},
address = {Virtual Event, China},
series = {SIGIR '20},
abstract = {Truthfulness judgments are a fundamental step in the process of fighting misinformation, as they are crucial to train and evaluate classifiers that automatically distinguish true and false statements. Usually such judgments are made by experts, like journalists for political statements or medical doctors for medical statements. In this paper, we follow a different approach and rely on (non-expert) crowd workers. This of course leads to the following research question: Can crowdsourcing be reliably used to assess the truthfulness of information and to create large-scale labeled collections for information credibility systems? To address this issue, we present the results of an extensive study based on crowdsourcing: we collect thousands of truthfulness assessments over two datasets, and we compare expert judgments with crowd judgments, expressed on scales with various granularity levels. We also measure the political bias and the cognitive background of the workers, and quantify their effect on the reliability of the data provided by the crowd.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Culpepper, Shane J; Sanderson, Mark; Scholer, Falk; Mizzaro, Stefano
Fewer topics? A million topics? Both?! On topics subsets in test collections Journal Article
In: Inf. Retr. J., vol. 23, no 1, pp. 49–85, 2020.
@article{DBLP:journals/ir/RoiteroCSSM20,
title = {Fewer topics? A million topics? Both?! On topics subsets in test
collections},
author = {Kevin Roitero and Shane J Culpepper and Mark Sanderson and Falk Scholer and Stefano Mizzaro},
url = {https://doi.org/10.1007/s10791-019-09357-w},
doi = {10.1007/s10791-019-09357-w},
year = {2020},
date = {2020-01-01},
journal = {Inf. Retr. J.},
volume = {23},
number = {1},
pages = {49--85},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Han, Lei; Maddalena, Eddy; Checco, Alessandro; Sarasua, Cristina; Gadiraju, Ujwal; Roitero, Kevin; Demartini, Gianluca
Crowd Worker Strategies in Relevance Judgment Tasks Proceedings Article
In: Proceedings of the 13th International Conference on Web Search and Data Mining, pp. 241–249, Association for Computing Machinery, Houston, TX, USA, 2020, ISBN: 9781450368223.
@inproceedings{10.1145/3336191.3371857,
title = {Crowd Worker Strategies in Relevance Judgment Tasks},
author = {Lei Han and Eddy Maddalena and Alessandro Checco and Cristina Sarasua and Ujwal Gadiraju and Kevin Roitero and Gianluca Demartini},
url = {https://doi.org/10.1145/3336191.3371857},
doi = {10.1145/3336191.3371857},
isbn = {9781450368223},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 13th International Conference on Web Search and Data Mining},
pages = {241–249},
publisher = {Association for Computing Machinery},
address = {Houston, TX, USA},
series = {WSDM '20},
abstract = {Crowdsourcing is a popular technique to collect large amounts of human-generated labels, such as relevance judgments used to create information retrieval (IR) evaluation collections. Previous research has shown how collecting high quality labels from a crowdsourcing platform can be challenging. Existing quality assurance techniques focus on answer aggregation or on the use of gold questions where ground-truth data allows to check for the quality of the responses.In this paper, we present qualitative and quantitative results, revealing how different crowd workers adopt different work strategies to complete relevance judgment tasks efficiently and their consequent impact on quality. We delve into the techniques and tools that highly experienced crowd workers use to be more efficient in completing crowdsourcing micro-tasks. To this end, we use both qualitative results from worker interviews and surveys, as well as the results of a data-driven study of behavioral log data (i.e., clicks, keystrokes and keyboard shortcuts) collected from crowd workers performing relevance judgment tasks. Our results highlight the presence of frequently used shortcut patterns that can speed-up task completion, thus increasing the hourly wage of efficient workers. We observe how crowd work experiences result in different types of working strategies, productivity levels, quality and diversity of the crowdsourced judgments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Brunello, Andrea; Serra, Giuseppe; Mizzaro, Stefano
Effectiveness evaluation without human relevance judgments: A systematic analysis of existing methods and of their combinations Journal Article
In: Information Processing & Management, vol. 57, no 2, pp. 102149, 2020, ISSN: 0306-4573.
@article{ROITERO2020102149,
title = {Effectiveness evaluation without human relevance judgments: A systematic analysis of existing methods and of their combinations},
author = {Kevin Roitero and Andrea Brunello and Giuseppe Serra and Stefano Mizzaro},
url = {http://www.sciencedirect.com/science/article/pii/S030645731930192X},
doi = {https://doi.org/10.1016/j.ipm.2019.102149},
issn = {0306-4573},
year = {2020},
date = {2020-01-01},
journal = {Information Processing & Management},
volume = {57},
number = {2},
pages = {102149},
abstract = {In test collection based evaluation of retrieval effectiveness, it has been suggested to completely avoid using human relevance judgments. Although several methods have been proposed, their accuracy is still limited. In this paper we present two overall contributions. First, we provide a systematic comparison of all the most widely adopted previous approaches on a large set of 14 TREC collections. We aim at analyzing the methods in a homogeneous and complete way, in terms of the accuracy measures used as well as in terms of the datasets selected, showing that considerably different results may be achieved considering different methods, datasets, and measures. Second, we study the combination of such methods, which, to the best of our knowledge, has not been investigated so far. Our experimental results show that simple combination strategies based on data fusion techniques are usually not effective and even harmful. However, some more sophisticated solutions, based on machine learning, are indeed effective and often outperform all individual methods. Moreover, they are more stable, as they show a smaller variation across datasets. Our results have the practical implication that, when trying to automatically evaluate retrieval effectiveness, researchers should not use a single method, but a (machine-learning based) combination of them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Soprano, Michael; Portelli, Beatrice; Spina, Damiano; Mea, Vincenzo Della; Serra, Giuseppe; Mizzaro, Stefano; Demartini, Gianluca
The COVID-19 Infodemic: Can the Crowd Judge Recent Misinformation Objectively? Proceedings Article
In: Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM2020). Galway, Ireland (Online). October 19-23, 2020. Conference Rank: GGS A+, Core A, pp. 1305–1314, Association for Computing Machinery, Virtual Event, Ireland, 2020, ISBN: 9781450368599.
@inproceedings{conference-paper-cikm2020,
title = {The COVID-19 Infodemic: Can the Crowd Judge Recent Misinformation Objectively?},
author = {Kevin Roitero and Michael Soprano and Beatrice Portelli and Damiano Spina and Vincenzo Della Mea and Giuseppe Serra and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3340531.3412048},
doi = {10.1145/3340531.3412048},
isbn = {9781450368599},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM2020). Galway, Ireland (Online). October 19-23, 2020. Conference Rank: GGS A+, Core A},
pages = {1305–1314},
publisher = {Association for Computing Machinery},
address = {Virtual Event, Ireland},
series = {CIKM '20},
abstract = {Misinformation is an ever increasing problem that is difficult to solve for the research community and has a negative impact on the society at large. Very recently, the problem has been addressed with a crowdsourcing-based approach to scale up labeling efforts: to assess the truthfulness of a statement, instead of relying on a few experts, a crowd of (non-expert) judges is exploited. We follow the same approach to study whether crowdsourcing is an effective and reliable method to assess statements truthfulness during a pandemic. We specifically target statements related to the COVID-19 health emergency, that is still ongoing at the time of the study and has arguably caused an increase of the amount of misinformation that is spreading online (a phenomenon for which the term "infodemic" has been used). By doing so, we are able to address (mis)information that is both related to a sensitive and personal issue like health and very recent as compared to when the judgment is done: two issues that have not been analyzed in related work.In our experiment, crowd workers are asked to assess the truthfulness of statements, as well as to provide evidence for the assessments as a URL and a text justification. Besides showing that the crowd is able to accurately judge the truthfulness of the statements, we also report results on many different aspects, including: agreement among workers, the effect of different aggregation functions, of scales transformations, and of workers background / bias. We also analyze workers behavior, in terms of queries submitted, URLs found / selected, text justifications, and other behavioral data like clicks and mouse actions collected by means of an ad hoc logger.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Fan, Shaoyang; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background Proceedings Article
In: Proceedings of the 43st International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2020). Xi’an, China (Online). July 25-30, 2020. Conference Rank: GGS A++, Core A*, pp. 439–448, Association for Computing Machinery, Virtual Event, China, 2020, ISBN: 9781450380164.
@inproceedings{conference-paper-sigir2020,
title = {Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background},
author = {Kevin Roitero and Michael Soprano and Shaoyang Fan and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3397271.3401112},
doi = {10.1145/3397271.3401112},
isbn = {9781450380164},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 43st International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2020). Xi’an, China (Online). July 25-30, 2020. Conference Rank: GGS A++, Core A*},
pages = {439–448},
publisher = {Association for Computing Machinery},
address = {Virtual Event, China},
series = {SIGIR '20},
abstract = {Truthfulness judgments are a fundamental step in the process of fighting misinformation, as they are crucial to train and evaluate classifiers that automatically distinguish true and false statements. Usually such judgments are made by experts, like journalists for political statements or medical doctors for medical statements. In this paper, we follow a different approach and rely on (non-expert) crowd workers. This of course leads to the following research question: Can crowdsourcing be reliably used to assess the truthfulness of information and to create large-scale labeled collections for information credibility systems? To address this issue, we present the results of an extensive study based on crowdsourcing: we collect thousands of truthfulness assessments over two datasets, and we compare expert judgments with crowd judgments, expressed on scales with various granularity levels. We also measure the political bias and the cognitive background of the workers, and quantify their effect on the reliability of the data provided by the crowd.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Roitero, Kevin; Demartini, Gianluca; Mizzaro, Stefano; Spina, Damiano
Crowdsourcing Truthfulness: The Impact of Judgment Scale and Assessor Bias Proceedings Article
In: Jose, Joemon M.; Yilmaz, Emine; Magalhães, João; Castells, Pablo; Ferro, Nicola; Silva, Mário J.; Martins, Flávio (Ed.): Advances in Information Retrieval - 42nd European Conference on IR Research, ECIR 2020, Lisbon, Portugal, April 14-17, 2020, Proceedings, Part II, pp. 207–214, Springer, 2020.
@inproceedings{DBLP:conf/ecir/BarberaRDMS20,
title = {Crowdsourcing Truthfulness: The Impact of Judgment Scale and Assessor
Bias},
author = {David La Barbera and Kevin Roitero and Gianluca Demartini and Stefano Mizzaro and Damiano Spina},
editor = {Joemon M. Jose and Emine Yilmaz and João Magalhães and Pablo Castells and Nicola Ferro and Mário J. Silva and Flávio Martins},
url = {https://doi.org/10.1007/978-3-030-45442-5_26},
doi = {10.1007/978-3-030-45442-5_26},
year = {2020},
date = {2020-01-01},
booktitle = {Advances in Information Retrieval - 42nd European Conference on IR
Research, ECIR 2020, Lisbon, Portugal, April 14-17, 2020, Proceedings,
Part II},
volume = {12036},
pages = {207--214},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Demartini, Gianluca; Mizzaro, Stefano; Spina, Damiano
Human-in-the-loop Artificial Intelligence for Fighting Online Misinformation: Challenges and Opportunities Journal Article
In: IEEE Data Eng. Bull., vol. 43, no 3, pp. 65–74, 2020.
@article{DBLP:journals/debu/DemartiniMS20,
title = {Human-in-the-loop Artificial Intelligence for Fighting Online Misinformation:
Challenges and Opportunities},
author = {Gianluca Demartini and Stefano Mizzaro and Damiano Spina},
url = {http://sites.computer.org/debull/A20sept/p65.pdf},
year = {2020},
date = {2020-01-01},
journal = {IEEE Data Eng. Bull.},
volume = {43},
number = {3},
pages = {65--74},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Barbera, David La; Polónia, António; Roitero, Kevin; Conde-Sousa, Eduardo; Mea, Vincenzo Della
Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning Journal Article
In: Journal of Imaging, vol. 6, no 9, 2020, ISSN: 2313-433X.
@article{jimaging6090082,
title = {Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning},
author = {David La Barbera and António Polónia and Kevin Roitero and Eduardo Conde-Sousa and Vincenzo Della Mea},
url = {https://www.mdpi.com/2313-433X/6/9/82},
doi = {10.3390/jimaging6090082},
issn = {2313-433X},
year = {2020},
date = {2020-01-01},
journal = {Journal of Imaging},
volume = {6},
number = {9},
abstract = {Breast cancer is the most frequently diagnosed cancer in woman. The correct identification of the HER2 receptor is a matter of major importance when dealing with breast cancer: an over-expression of HER2 is associated with aggressive clinical behaviour; moreover, HER2 targeted therapy results in a significant improvement in the overall survival rate. In this work, we employ a pipeline based on a cascade of deep neural network classifiers and multi-instance learning to detect the presence of HER2 from Haematoxylin-Eosin slides, which partly mimics the pathologist’s behaviour by first recognizing cancer and then evaluating HER2. Our results show that the proposed system presents a good overall effectiveness. Furthermore, the system design is prone to further improvements that can be easily deployed in order to increase the effectiveness score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Carterette, Ben; Mehrotra, Rishabh; Lalmas, Mounia
Leveraging Behavioral Heterogeneity Across Markets for Cross-Market Training of Recommender Systems Book Chapter
In: Companion Proceedings of the Web Conference 2020, pp. 694–702, Association for Computing Machinery, New York, NY, USA, 2020, ISBN: 9781450370240.
@inbook{10.1145/3366424.3384362,
title = {Leveraging Behavioral Heterogeneity Across Markets for Cross-Market Training of Recommender Systems},
author = {Kevin Roitero and Ben Carterette and Rishabh Mehrotra and Mounia Lalmas},
url = {https://doi.org/10.1145/3366424.3384362},
isbn = {9781450370240},
year = {2020},
date = {2020-01-01},
booktitle = {Companion Proceedings of the Web Conference 2020},
pages = {694–702},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Modern recommender systems are optimised to deliver personalised recommendations to millions of users spread across different geographic regions exhibiting various forms of heterogeneity, including behavioural-, content- and trend specific heterogeneity. System designers often face the challenge of deploying either a single global model across all markets, or developing custom models for different markets. In this work, we focus on the specific case of music recommendation across 21 different markets, and consider the trade-off between developing global model versus market specific models. We begin by investigating behavioural differences across users of different markets, and motivate the need for considering market as an important factor when training models. We propose five different training styles, covering the entire spectrum of models: from a single global model to individual market specific models, and in the process, propose ways to identify and leverage users abroad, and data from similar markets. Based on a large scale experimentation with data for 100M users across 21 different markets, we present insights which highlight that markets play a key role, and describe models that leverage market specific data in serving personalised recommendations.},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Roitero, Kevin; Bozzato, Cristian; Mea, Vincenzo Della; Mizzaro, Stefano; Serra, Giuseppe
Twitter goes to the Doctor: Detecting Medical Tweets using Machine Learning and BERT. Proceedings Article
In: SIIRH@ ECIR, 2020.
@inproceedings{roitero2020twitter,
title = {Twitter goes to the Doctor: Detecting Medical Tweets using Machine Learning and BERT.},
author = {Kevin Roitero and Cristian Bozzato and Vincenzo Della Mea and Stefano Mizzaro and Giuseppe Serra},
year = {2020},
date = {2020-01-01},
booktitle = {SIIRH@ ECIR},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}