Elenco (non esaustivo) di pubblicazioni prodotte dai membri del laboratorio
2025
Soprano, Michael; Modha, Sandip; Roitero, Kevin; Maddalena, Eddy; Viviani, Marco; Pasi, Gabriella; Mizzaro, Stefano
AIDME: A Scalable, Interpretable Framework for AI-Aided Scoping Reviews Proceedings Article
In: Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR), pp. 194–207, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400718618.
@inproceedings{10.1145/3731120.3744586,
title = {AIDME: A Scalable, Interpretable Framework for AI-Aided Scoping Reviews},
author = {Michael Soprano and Sandip Modha and Kevin Roitero and Eddy Maddalena and Marco Viviani and Gabriella Pasi and Stefano Mizzaro},
url = {https://doi.org/10.1145/3731120.3744586},
doi = {10.1145/3731120.3744586},
isbn = {9798400718618},
year = {2025},
date = {2025-07-18},
urldate = {2025-01-01},
booktitle = {Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)},
pages = {194–207},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {ICTIR '25},
abstract = {Scientific publishing is expanding rapidly across disciplines, making it increasingly difficult for researchers to organize, filter, and synthesize the literature. Systematic reviews address this challenge through structured analysis, but the early stages, particularly the screening phase, can become overwhelming when faced with thousands of records. Scoping reviews are often used as a preparatory step to explore and structure the literature before applying stricter protocols such as the PRISMA 2020 guidelines. In this work, we introduce AIDME (AI-Aided Document Mapping and Evaluation), a general-purpose framework that leverages Large Language Models (LLMs), topic modeling, thematic labeling, and citation network analysis to support the creation of scoping reviews in research areas with high publication volume. AIDME enables scalable filtering, clustering, labeling, and prioritization of publications while preserving human oversight. We evaluate the proposed framework through a case study on methods for assessing truthfulness in fact-checking, a fast-evolving field characterized by inconsistent terminology and fragmented methodologies. Our results show that AIDME reduces manual effort and produces structured outputs that facilitate subsequent PRISMA-compliant systematic reviews.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Demartini, Gianluca; Hauff, Claudia; Lease, Matthew; Mizzaro, Stefano; Roitero, Kevin; Sanderson, Mark; Scholer, Falk; Shah, Chirag; Spina, Damiano; Thomas, Paul; Vries, Arjen P.; Zuccon, Guido
Preaching to the ChoIR: Lessons IR Should Share with AI Proceedings Article
In: Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR), pp. 78–91, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400718618.
@inproceedings{10.1145/3731120.3744612,
title = {Preaching to the ChoIR: Lessons IR Should Share with AI},
author = {Gianluca Demartini and Claudia Hauff and Matthew Lease and Stefano Mizzaro and Kevin Roitero and Mark Sanderson and Falk Scholer and Chirag Shah and Damiano Spina and Paul Thomas and Arjen P. Vries and Guido Zuccon},
url = {https://doi.org/10.1145/3731120.3744612},
doi = {10.1145/3731120.3744612},
isbn = {9798400718618},
year = {2025},
date = {2025-07-18},
urldate = {2025-01-01},
booktitle = {Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)},
pages = {78–91},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {ICTIR '25},
abstract = {The field of Information Retrieval (IR) changed profoundly at the end of the 1990s with the rise of Web Search, and there are parallels with developments in Artificial Intelligence (AI) happening today with the advent of ChatGPT, Large Language Models, and Generative AI. We acknowledge that there are clear differences between IR and AI. For example, IR is a much smaller field, and new problems arise, like data contamination that may affect benchmark-based evaluation of AI systems. But looking through the lens of an IR researcher, there are many striking similarities between the two fields of IR (25 years ago) and AI (today), and many topics appearing in discussions in AI resemble those of 25 years ago in IR: benchmark reliability and robust evaluation, reproducibility of results for non-public models, privacy and copyright issues, efficiency and scalability, etc. In this paper, we discuss similarities and differences between IR and AI and then derive some lessons learned in the field of IR as a list of recommendations - urging the IR community to reflect on, discuss, and convey these lessons to the AI field. We believe that a joint community effort by all IR researchers is both necessary and dutiful to obtain a fruitful discussion and research advancements with the AI community.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Lunardi, Riccardo; Zhuang, Mengdie; Roitero, Kevin
Impersonating the Crowd: Evaluating LLMs' Ability to Replicate Human Judgment in Misinformation Assessment Proceedings Article
In: Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR), pp. 12–21, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400718618.
@inproceedings{10.1145/3731120.3744581,
title = {Impersonating the Crowd: Evaluating LLMs' Ability to Replicate Human Judgment in Misinformation Assessment},
author = {David La Barbera and Riccardo Lunardi and Mengdie Zhuang and Kevin Roitero},
url = {https://doi.org/10.1145/3731120.3744581},
doi = {10.1145/3731120.3744581},
isbn = {9798400718618},
year = {2025},
date = {2025-07-18},
urldate = {2025-01-01},
booktitle = {Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)},
pages = {12–21},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {ICTIR '25},
abstract = {Large Language Models (LLMs) are increasingly used to replicate human decision-making in subjective tasks. In this work, we investigate whether LLMs can effectively impersonate real crowd workers when evaluating political misinformation statements. We assess (i) the agreement between LLM-generated assessments and human judgments and (ii) whether impersonation skews LLM assessments, impacting accuracy. Using publicly available misinformation assessment datasets, we prompt LLMs to impersonate real crowd workers based on their demographic profiles and evaluate them under the same statements. Through comparative analysis, we measure agreement rates and discrepancies in classification patterns. Our findings suggest that while some LLMs align moderately with crowd assessments, their impersonation ability remains inconsistent. Impersonation does not uniformly improve accuracy and often reinforces systematic biases, highlighting limitations in replicating human judgment.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Soprano, Michael; Tapu, Denis Eduard; Barbera, David La; Roitero, Kevin; Mizzaro, Stefano
The Magnitude of Truth: On Using Magnitude Estimation for Truthfulness Assessment Proceedings Article
In: Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 446–456, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400715921.
@inproceedings{10.1145/3726302.3730091,
title = {The Magnitude of Truth: On Using Magnitude Estimation for Truthfulness Assessment},
author = {Michael Soprano and Denis Eduard Tapu and David La Barbera and Kevin Roitero and Stefano Mizzaro},
url = {https://doi.org/10.1145/3726302.3730091},
doi = {10.1145/3726302.3730091},
isbn = {9798400715921},
year = {2025},
date = {2025-07-13},
urldate = {2025-01-01},
booktitle = {Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {446–456},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {SIGIR '25},
abstract = {Assessing the truthfulness of information is a critical task in fact-checking, and is typically performed using binary or coarse ordinal scales (2-6 levels), though fine-grained scales (e.g., 100 levels) have also been explored. Magnitude Estimation (ME) takes this approach further by allowing assessors to assign any value in the range (0, + ∞). However, it introduces challenges, including the need for aggregation of assessments from individuals with different interpretations of the scale. Despite these, its successful applications in other domains suggest its potential suitability for truthfulness assessment. We conduct a crowdsourcing study by collecting assessments on claims sourced from the PolitiFact fact-checking organization using ME. To the best of our knowledge, this is the first systematic investigation of ME in the context of truthfulness assessment. Our results show that while aggregation methods significantly impact assessment quality, optimal aggregation strategies yield accuracy and reliability comparable to traditional scales. More importantly, ME allows capturing subtle differences in truthfulness, offering richer insights than conventional coarse-grained scales.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Wright, Dustin; Soprano, Michael; Augenstein, Isabelle; Mizzaro, Stefano
Efficiency and Effectiveness of LLM-Based Summarization of Evidence in Crowdsourced Fact-Checking Proceedings Article
In: Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 457–467, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400715921.
@inproceedings{10.1145/3726302.3729960,
title = {Efficiency and Effectiveness of LLM-Based Summarization of Evidence in Crowdsourced Fact-Checking},
author = {Kevin Roitero and Dustin Wright and Michael Soprano and Isabelle Augenstein and Stefano Mizzaro},
url = {https://doi.org/10.1145/3726302.3729960},
doi = {10.1145/3726302.3729960},
isbn = {9798400715921},
year = {2025},
date = {2025-07-13},
urldate = {2025-01-01},
booktitle = {Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {457–467},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {SIGIR '25},
abstract = {Evaluating the truthfulness of online content is critical for combating misinformation. This study examines the efficiency and effectiveness of crowdsourced truthfulness assessments through a comparative analysis of two approaches: one involving full-length webpages as evidence for each claim, and another using summaries for each evidence document generated with an LLM. Using an A/B testing setting, we engage a diverse pool of participants tasked with evaluating the truthfulness of statements under these conditions.Our analysis explores both the quality of assessments and the behavioral patterns of participants. The results reveal that relying on summarized evidence offers comparable accuracy and error metrics to the standard modality while significantly improving efficiency. Workers in the Summary setting complete a significantly higher number of assessments, reducing task duration and costs. Additionally, the Summary modality maximizes internal agreement and maintains consistent reliance on and perceived usefulness of evidence, demonstrating its potential to streamline large-scale truthfulness evaluations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lunardi, Riccardo; Soprano, Michael; Coppola, Paolo; Mea, Vincenzo Della; Mizzaro, Stefano; Roitero, Kevin
PILs of Knowledge: A Synthetic Benchmark for Evaluating Question Answering Systems in Healthcare Proceedings Article
In: Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3648–3658, Association for Computing Machinery, Padua, Italy, 2025, ISBN: 9798400715921.
@inproceedings{10.1145/3726302.3730283,
title = {PILs of Knowledge: A Synthetic Benchmark for Evaluating Question Answering Systems in Healthcare},
author = {Riccardo Lunardi and Michael Soprano and Paolo Coppola and Vincenzo Della Mea and Stefano Mizzaro and Kevin Roitero},
url = {https://doi.org/10.1145/3726302.3730283},
doi = {10.1145/3726302.3730283},
isbn = {9798400715921},
year = {2025},
date = {2025-07-13},
urldate = {2025-01-01},
booktitle = {Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {3648–3658},
publisher = {Association for Computing Machinery},
address = {Padua, Italy},
series = {SIGIR '25},
abstract = {Patient Information Leaflets (PILs) provide essential information about medication usage, side effects, precautions, and interactions, making them a valuable resource for Question Answering (QA) systems in healthcare. However, no dedicated benchmark currently exists to evaluate QA systems specifically on PILs, limiting progress in this domain. To address this gap, we introduce a fact-supported synthetic benchmark composed of multiple-choice questions and answers generated from real PILs. We construct the benchmark using a fully automated pipeline that leverages multiple Large Language Models (LLMs) to generate diverse, realistic, and contextually relevant question-answer pairs. The benchmark is publicly released as a standardized evaluation framework for assessing the ability of LLMs to process and reason over PIL content. To validate its effectiveness, we conduct an initial evaluation with state-of-the-art LLMs, showing that the benchmark presents a realistic and challenging task, making it a valuable resource for advancing QA research in the healthcare domain.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Soprano, Michael; Eddy, Maddalena; Ros, Francesca Da; Zuliani, Maria Elena; Mizzaro, Stefano
Evaluation of Crowdsourced Peer Review using Synthetic Data and Simulations Proceedings Article
In: Cornia, Marcella; Nunzio, Giorgio Maria Di; Firmani, Donatella; Mizzaro, Stefano; Serra, Giuseppe; Tonelli, Sara; Tremamunno, Alessandro (Ed.): Proceedings of the 21st Conference on Information and Research Science Connecting to Digital and Library Science, CEUR-WS.org, Udine, Italy, 2025, ISSN: 1613-0073.
@inproceedings{soprano2025evaluation,
title = {Evaluation of Crowdsourced Peer Review using Synthetic Data and Simulations},
author = {Michael Soprano and Maddalena Eddy and Francesca Da Ros and Maria Elena Zuliani and Stefano Mizzaro},
editor = {Marcella Cornia and Giorgio Maria Di Nunzio and Donatella Firmani and Stefano Mizzaro and Giuseppe Serra and Sara Tonelli and Alessandro Tremamunno},
url = {https://ceur-ws.org/Vol-3937/paper8.pdf},
issn = {1613-0073},
year = {2025},
date = {2025-03-09},
urldate = {2025-01-01},
booktitle = {Proceedings of the 21st Conference on Information and Research Science Connecting to Digital and Library Science},
volume = {3937},
publisher = {CEUR-WS.org},
address = {Udine, Italy},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Maddalena, Eddy; Mizzaro, Stefano; Roitero, Kevin; Viviani, Marco; Barbera, David La; Modha, Sandip; Pasi, Gabriella; Ros, Francesca Da; Soprano, Michael
Report on the 14th Italian Information Retrieval Workshop (IIR 2024) Journal Article
In: SIGIR Forum, vol. 58, no 2, pp. 1–13, 2025, ISSN: 0163-5840.
@article{maddalena2024iir,
title = {Report on the 14th Italian Information Retrieval Workshop (IIR 2024)},
author = {Eddy Maddalena and Stefano Mizzaro and Kevin Roitero and Marco Viviani and David La Barbera and Sandip Modha and Gabriella Pasi and Francesca Da Ros and Michael Soprano},
url = {https://sigir.org/wp-content/uploads/2025/01/p15.pdf},
issn = {0163-5840},
year = {2025},
date = {2025-01-16},
urldate = {2024-01-01},
journal = {SIGIR Forum},
volume = {58},
number = {2},
pages = {1–13},
publisher = {Association for Computing Machinery (ACM)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Spina, Damiano; Roitero, Kevin; Mizzaro, Stefano; Mea, Vincenzo Della; Ros, Francesca Da; Soprano, Michael; Akebli, Hafsa; Falcon, Alex; Fasihi, Mehdi; Fiorin, Alessio; Barbera, David La; Bosco, Daniele Lizzio; Lunardi, Riccardo; Marturano, Alberto; Muhammad, Zaka-Ud-Din; Nascimben, Francesco; Nottebaum, Moritz; Pascoli, Massimiliano; Popescu, Mihai Horia; Rasotto, Laura; Rehman, Mubashara; Taverna, Francesco; Tomasetig, Biagio; Tremamunno, Alessandro
Report on the Hands-On PhD Course on Responsible AI from the Lens of an Information Access Researcher Journal Article
In: SIGIR Forum, vol. 58, no 2, pp. 1–61, 2025, ISSN: 0163-5840.
@article{spina2024responsibleai,
title = {Report on the Hands-On PhD Course on Responsible AI from the Lens of an Information Access Researcher},
author = {Damiano Spina and Kevin Roitero and Stefano Mizzaro and Vincenzo Della Mea and Francesca Da Ros and Michael Soprano and Hafsa Akebli and Alex Falcon and Mehdi Fasihi and Alessio Fiorin and David La Barbera and Daniele Lizzio Bosco and Riccardo Lunardi and Alberto Marturano and Zaka-Ud-Din Muhammad and Francesco Nascimben and Moritz Nottebaum and Massimiliano Pascoli and Mihai Horia Popescu and Laura Rasotto and Mubashara Rehman and Francesco Taverna and Biagio Tomasetig and Alessandro Tremamunno},
url = {https://sigir.org/wp-content/uploads/2025/01/p07.pdf},
issn = {0163-5840},
year = {2025},
date = {2025-01-14},
urldate = {2024-01-16},
journal = {SIGIR Forum},
volume = {58},
number = {2},
pages = {1–61},
publisher = {Association for Computing Machinery (ACM)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2024
Lunardi, Riccardo; Barbera, David La; Roitero, Kevin
The Elusiveness of Detecting Political Bias in Language Models Proceedings Article
In: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management, pp. 3922–3926, Association for Computing Machinery, Boise, ID, USA, 2024, ISBN: 9798400704369.
@inproceedings{10.1145/3627673.3680002,
title = {The Elusiveness of Detecting Political Bias in Language Models},
author = {Riccardo Lunardi and David La Barbera and Kevin Roitero},
url = {https://doi.org/10.1145/3627673.3680002},
doi = {10.1145/3627673.3680002},
isbn = {9798400704369},
year = {2024},
date = {2024-10-21},
urldate = {2024-01-01},
booktitle = {Proceedings of the 33rd ACM International Conference on Information and Knowledge Management},
pages = {3922–3926},
publisher = {Association for Computing Machinery},
address = {Boise, ID, USA},
series = {CIKM '24},
abstract = {This study challenges the prevailing approach of measuring political leanings in Large Language Models (LLMs) through direct questioning. By extensively testing LLMs with original, positively and negatively paraphrased Political Compass questions we demonstrate that LLMs do not consistently reveal their political biases in response to standard questions. Our findings indicate that LLMs' political orientations are elusive, easily influenced by subtle changes in phrasing and context. This study underscores the limitations of direct questioning in accurately measuring the political biases of LLMs and emphasizes the necessity for more refined and effective approaches to understand their true political stances.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Barbera, David La; Maddalena, Eddy; Mizzaro, Stefano
Enhancing Fact-Checking: From Crowdsourced Validation to Integration with Large Language Models Proceedings Article
In: Mizzaro, Stefano; Maddalena, Eddy; Viviani, Marco; Roitero, Kevin (Ed.): Proceedings of the 14th Italian Information Retrieval Workshop, pp. 74–77, CEUR-WS.org, Udine, Italy, 2024.
@inproceedings{DBLP:conf/iir/Roitero24,
title = {Enhancing Fact-Checking: From Crowdsourced Validation to Integration with Large Language Models},
author = {Kevin Roitero and Michael Soprano and David La Barbera and Eddy Maddalena and Stefano Mizzaro},
editor = {Stefano Mizzaro and Eddy Maddalena and Marco Viviani and Kevin Roitero},
url = {https://ceur-ws.org/Vol-3802/paper13.pdf},
year = {2024},
date = {2024-10-16},
urldate = {2024-01-01},
booktitle = {Proceedings of the 14th Italian Information Retrieval Workshop},
volume = {3802},
pages = {74–77},
publisher = {CEUR-WS.org},
address = {Udine, Italy},
series = {CEUR Workshop Proceedings},
abstract = {Information retrieval effectiveness evaluation is often carried out by means of test collections. Many works investigated possible sources of bias in such an approach. We propose a systematic approach to identify bias and its causes, and to remove it, thus enforcing fairness in effectiveness evaluation by means of test collections.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Singh, Jaspreet; Soprano, Michael; Roitero, Kevin; Ceolin, Davide
Crowdsourcing Statement Classification to Enhance Information Quality Prediction Proceedings Article
In: Preuss, Mike; Leszkiewicz, Agata; Boucher, Jean-Christopher; Fridman, Ofer; Stampe, Lucas (Ed.): Proceedings of the 6th Multidisciplinary International Symposium on Disinformation in Open Online Media (MISDOOM 2024), pp. 70–85, Springer Nature Switzerland, Münster, Germany, 2024, ISBN: 978-3-031-71210-4.
@inproceedings{10.1007/978-3-031-71210-4_5,
title = {Crowdsourcing Statement Classification to Enhance Information Quality Prediction},
author = {Jaspreet Singh and Michael Soprano and Kevin Roitero and Davide Ceolin},
editor = {Mike Preuss and Agata Leszkiewicz and Jean-Christopher Boucher and Ofer Fridman and Lucas Stampe},
url = {https://link.springer.com/chapter/10.1007/978-3-031-71210-4_5},
doi = {10.1007/978-3-031-71210-4_5},
isbn = {978-3-031-71210-4},
year = {2024},
date = {2024-08-31},
urldate = {2024-01-01},
booktitle = {Proceedings of the 6th Multidisciplinary International Symposium on Disinformation in Open Online Media (MISDOOM 2024)},
pages = {70–85},
publisher = {Springer Nature Switzerland},
address = {Münster, Germany},
series = {Lecture Notes in Computer Science},
abstract = {This paper explores the use of crowdsourcing to classify statement types in film reviews to assess their information quality. Employing the Argument Type Identification Procedure which uses the Periodic Table of Arguments to categorize arguments, the study aims to connect statement types to the overall argument strength and information reliability. Focusing on non-expert annotators in a crowdsourcing environment, the research assesses their reliability based on various factors including language proficiency and annotation experience. Results indicate the importance of careful annotator selection and training to achieve high inter-annotator agreement and highlight challenges in crowdsourcing statement classification for information quality assessment.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Soprano, Michael; Roitero, Kevin; Gadiraju, Ujwal; Maddalena, Eddy; Demartini, Gianluca
Longitudinal Loyalty: Understanding The Barriers To Running Longitudinal Studies On Crowdsourcing Platforms Journal Article
In: ACM Transactions on Social Computing, vol. 1, iss. 1, no 1, pp. 50, 2024, ISSN: 2469-7818.
@article{10.1145/3674884,
title = {Longitudinal Loyalty: Understanding The Barriers To Running Longitudinal Studies On Crowdsourcing Platforms},
author = {Michael Soprano and Kevin Roitero and Ujwal Gadiraju and Eddy Maddalena and Gianluca Demartini},
editor = {ACM},
url = {https://doi.org/10.1145/3674884},
doi = {10.1145/3674884},
issn = {2469-7818},
year = {2024},
date = {2024-08-11},
urldate = {2024-08-11},
journal = {ACM Transactions on Social Computing},
volume = {1},
number = {1},
issue = {1},
pages = {50},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Crowdsourcing tasks have been widely used to collect a large number of human labels at scale. While some of these tasks are deployed by requesters and performed only once by crowd workers, others require the same worker to perform the same task or a variant of it more than once, thus participating in a so-called longitudinal study. Despite the prevalence of longitudinal studies in crowdsourcing, there is a limited understanding of factors that influence worker participation in them across different crowdsourcing marketplaces. We present results from a large-scale survey of 300 workers on 3 different micro-task crowdsourcing platforms: Amazon Mechanical Turk, Prolific and Toloka. The aim is to understand how longitudinal studies are performed using crowdsourcing. We collect answers about 547 experiences and we analyze them both quantitatively and qualitatively. We synthesize 17 take-home messages about longitudinal studies together with 8 recommendations for task requesters and 5 best practices for crowdsourcing platforms to adequately conduct and support such kinds of studies. We release the survey and the data at: https://osf.io/h4du9/.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zeng, Xia; Barbera, David La; Roitero, Kevin; Zubiaga, Arkaitz; Mizzaro, Stefano
Combining Large Language Models and Crowdsourcing for Hybrid Human-AI Misinformation Detection Proceedings Article
In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2332–2336, Association for Computing Machinery, Washington DC, USA, 2024, ISBN: 9798400704314.
@inproceedings{10.1145/3626772.3657965,
title = {Combining Large Language Models and Crowdsourcing for Hybrid Human-AI Misinformation Detection},
author = {Xia Zeng and David La Barbera and Kevin Roitero and Arkaitz Zubiaga and Stefano Mizzaro},
url = {https://doi.org/10.1145/3626772.3657965},
doi = {10.1145/3626772.3657965},
isbn = {9798400704314},
year = {2024},
date = {2024-07-11},
urldate = {2024-07-11},
booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {2332–2336},
publisher = {Association for Computing Machinery},
address = {Washington DC, USA},
series = {SIGIR '24},
abstract = {Research on misinformation detection has primarily focused either on furthering Artificial Intelligence (AI) for automated detection or on studying humans' ability to deliver an effective crowdsourced solution. Each of these directions however shows different benefits. This motivates our work to study hybrid human-AI approaches jointly leveraging the potential of large language models and crowdsourcing, which is understudied to date. We propose novel combination strategies Model First, Worker First, and Meta Vote, which we evaluate along with baseline methods such as mean, median, hard- and soft-voting. Using 120 statements from the PolitiFact dataset, and a combination of state-of-the-art AI models and crowdsourced assessments, we evaluate the effectiveness of these combination strategies. Results suggest that the effectiveness varies with scales granularity, and that combining AI and human judgments enhances truthfulness assessments' effectiveness and robustness.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Maddalena, Eddy; Soprano, Michael; Roitero, Kevin; Demartini, Gianluca; Ceolin, Davide; Spina, Damiano; Mizzaro, Stefano
Crowdsourced Fact-checking: Does It Actually Work? Journal Article
In: Information Processing & Management, vol. 61, no 5, pp. 103792, 2024, ISSN: 0306-4573.
@article{BARBERA2024103792b,
title = {Crowdsourced Fact-checking: Does It Actually Work?},
author = {David La Barbera and Eddy Maddalena and Michael Soprano and Kevin Roitero and Gianluca Demartini and Davide Ceolin and Damiano Spina and Stefano Mizzaro},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324001523},
doi = {10.1016/j.ipm.2024.103792},
issn = {0306-4573},
year = {2024},
date = {2024-05-31},
urldate = {2024-05-31},
journal = {Information Processing & Management},
volume = {61},
number = {5},
pages = {103792},
abstract = {There is an important ongoing effort aimed to tackle misinformation and to perform reliable fact-checking by employing human assessors at scale, with a crowdsourcing-based approach. Previous studies on the feasibility of employing crowdsourcing for the task of misinformation detection have provided inconsistent results: some of them seem to confirm the effectiveness of crowdsourcing for assessing the truthfulness of statements and claims, whereas others fail to reach an effectiveness level higher than automatic machine learning approaches, which are still unsatisfactory. In this paper, we aim at addressing such inconsistency and understand if truthfulness assessment can indeed be crowdsourced effectively. To do so, we build on top of previous studies; we select some of those reporting low effectiveness levels, we highlight their potential limitations, and we then reproduce their work attempting to improve their setup to address those limitations. We employ various approaches, data quality levels, and agreement measures to assess the reliability of crowd workers when assessing the truthfulness of (mis)information. Furthermore, we explore different worker features and compare the results obtained with different crowds. According to our findings, crowdsourcing can be used as an effective methodology to tackle misinformation at scale. When compared to previous studies, our results indicate that a significantly higher agreement between crowd workers and experts can be obtained by using a different, higher-quality, crowdsourcing platform and by improving the design of the crowdsourcing task. Also, we find differences concerning task and worker features and how workers provide truthfulness assessments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baroni, Giulia Lucrezia; Rasotto, Laura; Roitero, Kevin; Tulisso, Angelica; Loreto, Carla Di; Mea, Vincenzo Della
Optimizing Vision Transformers for Histopathology: Pretraining and Normalization in Breast Cancer Classification Journal Article
In: Journal of Imaging, vol. 10, no 5, 2024, ISSN: 2313-433X.
@article{jimaging10050108,
title = {Optimizing Vision Transformers for Histopathology: Pretraining and Normalization in Breast Cancer Classification},
author = {Giulia Lucrezia Baroni and Laura Rasotto and Kevin Roitero and Angelica Tulisso and Carla Di Loreto and Vincenzo Della Mea},
editor = {MDPI},
url = {https://www.mdpi.com/2313-433X/10/5/108},
doi = {10.3390/jimaging10050108},
issn = {2313-433X},
year = {2024},
date = {2024-05-28},
urldate = {2024-04-30},
journal = {Journal of Imaging},
volume = {10},
number = {5},
abstract = {This paper introduces a self-attention Vision Transformer model specifically developed for classifying breast cancer in histology images. We examine various training strategies and configurations, including pretraining, dimension resizing, data augmentation and color normalization strategies, patch overlap, and patch size configurations, in order to evaluate their impact on the effectiveness of the histology image classification. Additionally, we provide evidence for the increase in effectiveness gathered through geometric and color data augmentation techniques. We primarily utilize the BACH dataset to train and validate our methods and models, but we also test them on two additional datasets, BRACS and AIDPATH, to verify their generalization capabilities. Our model, developed from a transformer pretrained on ImageNet, achieves an accuracy rate of 0.91 on the BACH dataset, 0.74 on the BRACS dataset, and 0.92 on the AIDPATH dataset. Using a model based on the prostate small and prostate medium HistoEncoder models, we achieve accuracy rates of 0.89 and 0.86, respectively. Our results suggest that pretraining on large-scale general datasets like ImageNet is advantageous. We also show the potential benefits of using domain-specific pretraining datasets, such as extensive histopathological image collections as in HistoEncoder, though not yet with clear advantages.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Fiorin, Alessio; Pablo, Carlos López; Lejeune, Marylène; Siraj, Ameer Hamza; Mea, Vincenzo Della
Enhancing AI Research for Breast Cancer: A Comprehensive Review of Tumor-Infiltrating Lymphocyte Datasets Journal Article
In: Journal of Imaging Informatics in Medicine, 2024, ISSN: 2948-2933.
@article{Fiorin2024,
title = {Enhancing AI Research for Breast Cancer: A Comprehensive Review of Tumor-Infiltrating Lymphocyte Datasets},
author = {Alessio Fiorin and Carlos López Pablo and Marylène Lejeune and Ameer Hamza Siraj and Vincenzo Della Mea},
url = {https://doi.org/10.1007/s10278-024-01043-8},
doi = {10.1007/s10278-024-01043-8},
issn = {2948-2933},
year = {2024},
date = {2024-05-01},
journal = {Journal of Imaging Informatics in Medicine},
abstract = {The field of immunology is fundamental to our understanding of the intricate dynamics of the tumor microenvironment. In particular, tumor-infiltrating lymphocyte (TIL) assessment emerges as essential aspect in breast cancer cases. To gain comprehensive insights, the quantification of TILs through computer-assisted pathology (CAP) tools has become a prominent approach, employing advanced artificial intelligence models based on deep learning techniques. The successful recognition of TILs requires the models to be trained, a process that demands access to annotated datasets. Unfortunately, this task is hampered not only by the scarcity of such datasets, but also by the time-consuming nature of the annotation phase required to create them. Our review endeavors to examine publicly accessible datasets pertaining to the TIL domain and thereby become a valuable resource for the TIL community. The overall aim of the present review is thus to make it easier to train and validate current and upcoming CAP tools for TIL assessment by inspecting and evaluating existing publicly available online datasets.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Demartini, Gianluca; Mizzaro, Stefano
Cognitive Biases in Fact-Checking and Their Countermeasures: A Review Journal Article
In: Information Processing & Management, vol. 61, no 3, pp. 103672, 2024, ISSN: 0306-4573.
@article{SOPRANO2024103672,
title = {Cognitive Biases in Fact-Checking and Their Countermeasures: A Review},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Gianluca Demartini and Stefano Mizzaro},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324000323},
doi = {10.1016/j.ipm.2024.103672},
issn = {0306-4573},
year = {2024},
date = {2024-02-11},
urldate = {2024-01-01},
journal = {Information Processing & Management},
volume = {61},
number = {3},
pages = {103672},
abstract = {The increase of the amount of misinformation spread every day online is a huge threat to the society. Organizations and researchers are working to contrast this misinformation plague. In this setting, human assessors are indispensable to correctly identify, assess and/or revise the truthfulness of information items, i.e., to perform the fact-checking activity. Assessors, as humans, are subject to systematic errors that might interfere with their fact-checking activity. Among such errors, cognitive biases are those due to the limits of human cognition. Although biases help to minimize the cost of making mistakes, they skew assessments away from an objective perception of information. Cognitive biases, hence, are particularly frequent and critical, and can cause errors that have a huge potential impact as they propagate not only in the community, but also in the datasets used to train automatic and semi-automatic machine learning models to fight misinformation. In this work, we present a review of the cognitive biases which might occur during the fact-checking process. In more detail, inspired by PRISMA – a methodology used for systematic literature reviews – we manually derive a list of 221 cognitive biases that may affect human assessors. Then, we select the 39 biases that might manifest during the fact-checking process, we group them into categories, and we provide a description. Finally, we present a list of 11 countermeasures that can be adopted by researchers, practitioners, and organizations to limit the effect of the identified cognitive biases on the fact-checking activity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baroni, Giulia L.; Rasotto, Laura; Roitero, Kevin; Siraj, Ameer Hamza; Mea, V. Della
Vision Transformers for Breast Cancer Histology Image Classification Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): Image Analysis and Processing - ICIAP 2023 Workshops, pp. 15–26, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-51026-7.
@inproceedings{10.1007/978-3-031-51026-7_2,
title = {Vision Transformers for Breast Cancer Histology Image Classification},
author = {Giulia L. Baroni and Laura Rasotto and Kevin Roitero and Ameer Hamza Siraj and V. Della Mea},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
doi = {10.1007/978-3-031-51026-7_2},
isbn = {978-3-031-51026-7},
year = {2024},
date = {2024-01-21},
urldate = {2024-01-01},
booktitle = {Image Analysis and Processing - ICIAP 2023 Workshops},
pages = {15–26},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We propose a self-attention Vision Transformer (ViT) model tailored for breast cancer histology image classification. The proposed architecture uses a stack of transformer layers, with each layer consisting of a multi-head self-attention mechanism and a position-wise feed-forward network, and it is trained with different strategies and configurations, including pretraining, resize dimension, data augmentation, patch overlap, and patch size, to investigate their impact on performance on the histology image classification task. Experimental results show that pretraining on ImageNet and using geometric and color data augmentation techniques significantly improve the model's accuracy on the task. Additionally, a patch size of 16 $$backslashtimes $$× 16 and no patch overlap were found to be optimal for this task. These findings provide valuable insights for the design of future ViT-based models for similar image classification tasks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ros, Francesca Da; Gaspero, Luca Di; Roitero, Kevin; Barbera, David La; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca; Deroma, Laura
Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region Journal Article
In: Journal of Healthcare Informatics Research, 2024, ISSN: 2509-498X.
@article{DaRos2024,
title = {Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region},
author = {Francesca Da Ros and Luca Di Gaspero and Kevin Roitero and David La Barbera and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent and Laura Deroma},
url = {https://doi.org/10.1007/s41666-023-00154-1},
doi = {10.1007/s41666-023-00154-1},
issn = {2509-498X},
year = {2024},
date = {2024-01-09},
urldate = {2024-01-09},
journal = {Journal of Healthcare Informatics Research},
abstract = {Emergency Medical Services (EMS) are crucial in delivering timely and effective medical care to patients in need. However, the complex and dynamic nature of operations poses challenges for decision-making processes at strategic, tactical, and operational levels. This paper proposes an action-driven strategy for EMS management, employing a multi-objective optimizer and a simulator to evaluate potential outcomes of decisions. The approach combines historical data with dynamic simulations and multi-objective optimization techniques to inform decision-makers and improve the overall performance of the system. The research focuses on the Friuli Venezia Giulia region in north-eastern Italy. The region encompasses various landscapes and demographic situations that challenge fairness and equity in service access. Similar challenges are faced in other regions with comparable characteristics. The Decision Support System developed in this work accurately models the real-world system and provides valuable feedback and suggestions to EMS professionals, enabling them to make informed decisions and enhance the efficiency and fairness of the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Data Bias Management Journal Article
In: Commun. ACM, vol. 67, no 1, pp. 28–32, 2023, ISSN: 0001-0782.
@article{10.1145/3611641,
title = {Data Bias Management},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://doi.org/10.1145/3611641},
doi = {10.1145/3611641},
issn = {0001-0782},
year = {2023},
date = {2023-12-21},
urldate = {2023-12-01},
journal = {Commun. ACM},
volume = {67},
number = {1},
pages = {28–32},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Envisioning a unique approach toward bias and fairness research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Mea, Vincenzo Della; Mizzaro, Stefano
Towards a Conversational-Based Agent for Health Services Proceedings Article
In: Falchi, Fabrizio; Giannotti, Fosca; Monreale, Anna; Boldrini, Chiara; Rinzivillo, Salvatore; Colantonio, Sara (Ed.): Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence, pp. 278–283, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/italia2023/Soprano23,
title = {Towards a Conversational-Based Agent for Health Services},
author = {Michael Soprano and Kevin Roitero and Vincenzo Della Mea and Stefano Mizzaro},
editor = {Fabrizio Falchi and Fosca Giannotti and Anna Monreale and Chiara Boldrini and Salvatore Rinzivillo and Sara Colantonio},
url = {https://ceur-ws.org/Vol-3486/96.pdf},
year = {2023},
date = {2023-09-20},
urldate = {2023-01-01},
booktitle = {Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence},
volume = {3486},
pages = {278–283},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {Conversational agents provide new modalities to access and interact with services and applications. Recently, they saw a backfire in their popularity, due to the recent advancements in language models. Such agents have been adopted in various fields such as healthcare and education, yet they received little attention in public administration. We describe as a practical use case a service of the portal that provides citizens of the Italian region of Friuli-Venezia Giulia with services related to their own Electronic Health Records. The service considered allows them to search for the available doctors and pediatricians in the region's municipalities. We rely on the use case described to propose a model for a conversational agent-based access modality. The model proposed allows us to lay the foundation for more advanced chatbot-like implementations which will use also alternative input modalities, such as voice-based communication.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Soprano, Michael; Roitero, Kevin; Maddalena, Eddy; Mizzaro, Stefano
Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned Proceedings Article
In: Nardini, Franco Maria; Tonelotto, Nicola; Faggioli, Guglielmo; Ferrara, Antonio (Ed.): Proceedings of the 13th Italian Information Retrieval Workshop, pp. 85–90, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/iir/BarberaSRMM23,
title = {Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned},
author = {David La Barbera and Michael Soprano and Kevin Roitero and Eddy Maddalena and Stefano Mizzaro},
editor = {Franco Maria Nardini and Nicola Tonelotto and Guglielmo Faggioli and Antonio Ferrara},
url = {https://ceur-ws.org/Vol-3448/paper-18.pdf},
year = {2023},
date = {2023-08-26},
urldate = {2023-08-15},
booktitle = {Proceedings of the 13th Italian Information Retrieval Workshop},
volume = {3448},
pages = {85–90},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {In this paper, we present our journey in exploring the use of crowdsourcing for fact-checking. We discuss our early experiments aimed towards the identification of the best possible setting for misinformation assessment using crowdsourcing. Our results indicate that the crowd can effectively address misinformation at scale, showing some degree of correlation with experts. We also highlight the influence of worker background on the quality of truthfulness assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Barbera, David La; Soprano, Michael; Demartini, Gianluca; Mizzaro, Stefano; Sakai, Tetsuya
How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments Journal Article
In: ACM Transactions on Information Systems, 2023, ISSN: 1046-8188, (Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)).
@article{10.1145/3597201,
title = {How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and David La Barbera and Michael Soprano and Gianluca Demartini and Stefano Mizzaro and Tetsuya Sakai},
doi = {10.1145/3597201},
issn = {1046-8188},
year = {2023},
date = {2023-08-18},
urldate = {2023-01-01},
journal = {ACM Transactions on Information Systems},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {To scale the size of Information Retrieval collections, crowdsourcing has become a common way to collect relevance judgments at scale. Crowdsourcing experiments usually employ 100-10,000 workers, but such a number is often decided in a heuristic way. The downside is that the resulting dataset does not have any guarantee of meeting predefined statistical requirements as, for example, have enough statistical power to be able to distinguish in a statistically significant way between the relevance of two documents. We propose a methodology adapted from literature on sound topic set size design, based on t-test and ANOVA, which aims at guaranteeing the resulting dataset to meet a predefined set of statistical requirements. We validate our approach on several public datasets. Our results show that we can reliably estimate the recommended number of workers needed to achieve statistical power, and that such estimation is dependent on the topic, while the effect of the relevance scale is limited. Furthermore, we found that such estimation is dependent on worker features such as agreement. Finally, we describe a set of practical estimation strategies that can be used to estimate the worker set size, and we also provide results on the estimation of document set sizes.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xie, Haoyu; Maddalena, Eddy; Qarout, Rehab; Checco, Alessandro
The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces Journal Article
In: Computer Supported Cooperative Work (CSCW), vol. 32, no 3, pp. 439-474, 2023, ISSN: 1573-7551.
@article{Xie2023b,
title = {The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces},
author = {Haoyu Xie and Eddy Maddalena and Rehab Qarout and Alessandro Checco},
url = {https://doi.org/10.1007/s10606-023-09464-9},
doi = {10.1007/s10606-023-09464-9},
issn = {1573-7551},
year = {2023},
date = {2023-07-28},
urldate = {2023-09-01},
journal = {Computer Supported Cooperative Work (CSCW)},
volume = {32},
number = {3},
pages = {439-474},
abstract = {Micro-task crowdsourcing marketplaces like Figure Eight (F8) connect a large pool of workers to employers through a single online platform, by aggregating multiple crowdsourcing platforms (channels) under a unique system. This paper investigates the F8 channels' demographic distribution and reward schemes by analysing more than 53k crowdsourcing tasks over four years, collecting survey data and scraping marketplace metadata. We reveal an heterogeneous per-channel demographic distribution, and an opaque channel commission scheme, that varies over time and is not communicated to the employer when launching a task: workers often will receive a smaller payment than expected by the employer. In addition, the impact of channel commission schemes on the relationship between requesters and crowdworkers is explored. These observations uncover important issues on ethics, reliability and transparency of crowdsourced experiment when using this kind of marketplaces, especially for academic research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Maddalena, Eddy; Ibáñez, Luis-Daniel; Reeves, Neal; Simperl, Elena
Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives Journal Article
In: ACM Trans. Intell. Syst. Technol., 2023, ISSN: 2157-6904, (Just Accepted).
@article{10.1145/3604940,
title = {Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives},
author = {Eddy Maddalena and Luis-Daniel Ibáñez and Neal Reeves and Elena Simperl},
url = {https://doi.org/10.1145/3604940},
doi = {10.1145/3604940},
issn = {2157-6904},
year = {2023},
date = {2023-06-01},
journal = {ACM Trans. Intell. Syst. Technol.},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Microtask crowdsourcing platforms are social intelligence systems in which volunteers, called crowdworkers, complete small, repetitive tasks in return for a small fee. Beyond payments, task requesters are considering non-monetary incentives such as points, badges and other gamified elements to increase performance and improve crowdworker experience. In this paper, we present Qrowdsmith, a platform for gamifying microtask crowdsourcing. To design the system, we explore empirically a range of gamified and financial incentives and analyse their impact on how efficient, effective, and reliable the results are. To maintain participation over time and save costs, we propose furtherance incentives, which are offered to crowdworkers to encourage additional contributions in addition to the fee agreed upfront. In a series of controlled experiments we find that while gamification can work as furtherance incentives, it impacts negatively on crowdworkers performance, both in terms of the quantity and quality of work, as compared to a baseline where they can continue to contribute voluntarily. Gamified incentives are also less effective than paid bonus equivalents. Our results contribute to the understanding of how best to encourage engagement in microtask crowdsourcing activities, and design better crowd intelligence systems.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Martinuzzi, Andrea; Armellin, Maria Teresa; Paparella, Gabriella; Maniero, Alberto; Mea, Vincenzo Della
Automated ICF Coding of Rehabilitation Notes for Low-Resource Languages via Continual Training of Language Models Journal Article
In: Studies in Health Technology and Informatics, vol. 302, pp. 763–767, 2023, ISSN: 1879-8365.
@article{Roitero2023,
title = {Automated ICF Coding of Rehabilitation Notes for Low-Resource Languages via Continual Training of Language Models},
author = {Kevin Roitero and Andrea Martinuzzi and Maria Teresa Armellin and Gabriella Paparella and Alberto Maniero and Vincenzo Della Mea},
editor = {IOS Press},
doi = {10.3233/SHTI230262},
issn = {1879-8365},
year = {2023},
date = {2023-05-18},
urldate = {2023-05-18},
journal = {Studies in Health Technology and Informatics},
volume = {302},
pages = {763–767},
publisher = {IOS Press},
abstract = {The coding of medical documents and in particular of rehabilitation notes using the International Classification of Functioning, Disability and Health (ICF) is a difficult task showing low agreement among experts. Such difficulty is mainly caused by the specific terminology that needs to be used for the task. In this paper, we address the task developing a model based on a large language model, BERT. By leveraging continual training of such a model using ICF textual descriptions, we are able to effectively encode rehabilitation notes expressed in Italian, an under-resourced language.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Roitero, Kevin; Guo, Furong
Predicting Crowd Workers Performance: An Information Quality Case Proceedings Article
In: Garrigós, Irene; Rodríguez, Juan Manuel Murillo; Wimmer, Manuel (Ed.): Web Engineering, pp. 75–90, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-34444-2.
@inproceedings{10.1007/978-3-031-34444-2_6,
title = {Predicting Crowd Workers Performance: An Information Quality Case},
author = {Davide Ceolin and Kevin Roitero and Furong Guo},
editor = {Irene Garrigós and Juan Manuel Murillo Rodríguez and Manuel Wimmer},
doi = {10.1007/978-3-031-34444-2_6},
isbn = {978-3-031-34444-2},
year = {2023},
date = {2023-01-01},
booktitle = {Web Engineering},
pages = {75–90},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Supervised machine learning tasks require human-labeled data. Crowdsourcing allows scaling up the labeling process, but the quality of the labels obtained can vary. To address this limitation, we propose methods for predicting label quality based on worker trajectories, i.e., on the sequence of documents workers explore during their crowdsourcing tasks. Trajectories represent a lightweight and non-intrusive form of worker behavior signal. We base our analysis on previously collected datasets composed of thousands of assessment data records including information such as workers' trajectories, workers' assessments, and experts' assessments. We model such behavior sequences as embeddings, to facilitate their management. Then, we: (1) use supervised methods to predict worker performance using a given ground truth; (2) perform an unsupervised analysis to provide insight into crowdsourcing quality when no gold standard is available. We test several supervised approaches which all beat the baseline we propose. Also, we identify significant differences between trajectory clusters in terms of assessments and worker performance. The trajectory-based analysis is a promising direction for non-intrusive worker performance evaluation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Serra, Giuseppe; Mea, Vincenzo Della; Mizzaro, Stefano; Cerro, Gianni; Vitelli, Michele; Molinara, Mario
Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform Journal Article
In: IEEE Access, vol. 11, pp. 50272–50284, 2023, ISSN: 2169-3536.
@article{10129181,
title = {Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform},
author = {Kevin Roitero and Beatrice Portelli and Giuseppe Serra and Vincenzo Della Mea and Stefano Mizzaro and Gianni Cerro and Michele Vitelli and Mario Molinara},
doi = {10.1109/ACCESS.2023.3277535},
issn = {2169-3536},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {50272–50284},
abstract = {The detection of contaminants in several environments (e.g., air, water, sewage systems) is of paramount importance to protect people and predict possible dangerous circumstances. Most works do this using classical Machine Learning tools that act on the acquired measurement data. This paper introduces two main elements: a low-cost platform to acquire, pre-process, and transmit data to classify contaminants in wastewater; and a novel classification approach to classify contaminants in wastewater, based on deep learning and the transformation of raw sensor data into natural language metadata. The proposed solution presents clear advantages against state-of-the-art systems in terms of higher effectiveness and reasonable efficiency. The main disadvantage of the proposed approach is that it relies on knowing the injection time, i.e., the instant in time when the contaminant is injected into the wastewater. For this reason, the developed system also includes a finite state machine tool able to infer the exact time instant when the substance is injected. The entire system is presented and discussed in detail. Furthermore, several variants of the proposed processing technique are also presented to assess the sensitivity to the number of used samples and the corresponding promptness/computational burden of the system. The lowest accuracy obtained by our technique is 91.4%, which is significantly higher than the 81.0% accuracy reached by the best baseline method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Deldjoo, Yashar; Mizzaro, Stefano; Bellogín, Alejandro
A unifying and general account of fairness measurement in recommender systems Journal Article
In: Information Processing & Management, vol. 60, no 1, pp. 103115, 2023, ISSN: 0306-4573.
@article{AMIGO2023103115,
title = {A unifying and general account of fairness measurement in recommender systems},
author = {Enrique Amigó and Yashar Deldjoo and Stefano Mizzaro and Alejandro Bellogín},
url = {https://www.sciencedirect.com/science/article/pii/S0306457322002163},
doi = {https://doi.org/10.1016/j.ipm.2022.103115},
issn = {0306-4573},
year = {2023},
date = {2023-01-01},
journal = {Information Processing & Management},
volume = {60},
number = {1},
pages = {103115},
abstract = {Fairness is fundamental to all information access systems, including recommender systems. However, the landscape of fairness definition and measurement is quite scattered with many competing definitions that are partial and often incompatible. There is much work focusing on specific – and different – notions of fairness and there exist dozens of metrics of fairness in the literature, many of them redundant and most of them incompatible. In contrast, to our knowledge, there is no formal framework that covers all possible variants of fairness and allows developers to choose the most appropriate variant depending on the particular scenario. In this paper, we aim to define a general, flexible, and parameterizable framework that covers a whole range of fairness evaluation possibilities. Instead of modeling the metrics based on an abstract definition of fairness, the distinctive feature of this study compared to the current state of the art is that we start from the metrics applied in the literature to obtain a unified model by generalization. The framework is grounded on a general work hypothesis: interpreting the space of users and items as a probabilistic sample space, two fundamental measures in information theory (Kullback–Leibler Divergence and Mutual Information) can capture the majority of possible scenarios for measuring fairness on recommender system outputs. In addition, earlier research on fairness in recommender systems could be viewed as single-sided, trying to optimize some form of equity across either user groups or provider/procurer groups, without considering the user/item space in conjunction, thereby overlooking/disregarding the interplay between user and item groups. Instead, our framework includes the notion of statistical independence between user and item groups. We finally validate our approach experimentally on both synthetic and real data according to a wide range of state-of-the-art recommendation algorithms and real-world data sets, showing that with our framework we can measure fairness in a general, uniform, and meaningful way.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Gonzalo, Julio; Mizzaro, Stefano
What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory Journal Article
In: IEEE Transactions on Knowledge and Data Engineering, vol. 35, no 2, pp. 2147–2157, 2023.
@article{9528028,
title = {What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory},
author = {Enrique Amigó and Julio Gonzalo and Stefano Mizzaro},
doi = {10.1109/TKDE.2021.3109823},
year = {2023},
date = {2023-01-01},
journal = {IEEE Transactions on Knowledge and Data Engineering},
volume = {35},
number = {2},
pages = {2147–2157},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
Brand, Erik; Roitero, Kevin; Soprano, Michael; Rahimi, Afshin; Demartini, Gianluca
A Neural Model to Jointly Predict and Explain Truthfulness of Statements Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546917,
title = {A Neural Model to Jointly Predict and Explain Truthfulness of Statements},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Afshin Rahimi and Gianluca Demartini},
url = {https://doi.org/10.1145/3546917},
doi = {10.1145/3546917},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity usually makes them opaque to the end user, making it difficult to foster trust in the system. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-the-art on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. We also calibrate the E-BART model, allowing the output of the final model be correctly interpreted as the confidence of correctness. Finally, we also conduct and extensive human evaluation on the impact of generated explanations and observe that: explanations increase human ability to spot misinformation and make people more skeptical about claims, and explanations generated by E-BART are competitive with ground truth explanations.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Qu, Yunke; Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Combining Human and Machine Confidence in Truthfulness Assessment Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546916,
title = {Combining Human and Machine Confidence in Truthfulness Assessment},
author = {Yunke Qu and David La Barbera and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
url = {https://doi.org/10.1145/3546916},
doi = {10.1145/3546916},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automatically detecting online misinformation at scale is a challenging and interdisciplinary problem. Deciding what is to be considered truthful information is sometimes controversial and difficult also for educated experts. As the scale of the problem increases, human-in-the-loop approaches to truthfulness that combine both the scalability of machine learning (ML) and the accuracy of human contributions have been considered. In this work we look at the potential to automatically combine machine-based systems with human-based systems. The former exploit supervised ML approaches; the latter involve either crowd workers (i.e., human non-experts) or human experts. Since both ML and crowdsourcing approaches can produce a score indicating the level of confidence on their truthfulness judgments (either algorithmic or self-reported, respectively), we address the question of whether it is feasible to make use of such confidence scores to effectively and efficiently combine three approaches: (i) machine-based methods; (ii) crowd workers, and (iii) human experts. The three approaches differ significantly as they range from available, cheap, fast, scalable, but less accurate to scarce, expensive, slow, not scalable, but highly accurate.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Bona, Francesco Bombassei De; Mizzaro, Stefano
Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf Proceedings Article
In: Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining, pp. 1605–1608, Association for Computing Machinery, Virtual Event, AZ, USA, 2022, ISBN: 9781450391320.
@inproceedings{conference-paper-wsdm2022,
title = {Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf},
author = {Michael Soprano and Kevin Roitero and Francesco Bombassei De Bona and Stefano Mizzaro},
doi = {10.1145/3488560.3502182},
isbn = {9781450391320},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
pages = {1605–1608},
publisher = {Association for Computing Machinery},
address = {Virtual Event, AZ, USA},
series = {WSDM '22},
abstract = {Due to their relatively low cost and ability to scale, crowdsourcing based approaches are widely used to collect a large amount of human annotated data. To this aim, multiple crowdsourcing platforms exist, where requesters can upload tasks and workers can carry them out and obtain payment in return. Such platforms share a task design and deploy workflow that is often counter-intuitive and cumbersome. To address this issue, we propose Crowd_Frame, a simple and complete framework which allows to develop and deploy diverse types of complex crowdsourcing tasks in an easy and customizable way. We show the abilities of the proposed framework and we make it available to researchers and practitioners.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Checco, Alessandro; Mizzaro, Stefano; Demartini, Gianluca
Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments Proceedings Article
In: Proceedings of the ACM Web Conference 2022, pp. 319–327, Association for Computing Machinery, Virtual Event, Lyon, France, 2022, ISBN: 9781450390965.
@inproceedings{10.1145/3485447.3511960,
title = {Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and Alessandro Checco and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3485447.3511960},
doi = {10.1145/3485447.3511960},
isbn = {9781450390965},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the ACM Web Conference 2022},
pages = {319–327},
publisher = {Association for Computing Machinery},
address = {Virtual Event, Lyon, France},
series = {WWW '22},
abstract = {In Information Retrieval (IR) evaluation, preference judgments are collected by presenting to the assessors a pair of documents and asking them to select which of the two, if any, is the most relevant. This is an alternative to the classic relevance judgment approach, in which human assessors judge the relevance of a single document on a scale; such an alternative allows to make relative rather than absolute judgments of relevance. While preference judgments are easier for human assessors to perform, the number of possible document pairs to be judged is usually so high that it makes it unfeasible to judge them all. Thus, following a similar idea to pooling strategies for single document relevance judgments where the goal is to sample the most useful documents to be judged, in this work we focus on analyzing alternative ways to sample document pairs to judge, in order to maximize the value of a fixed number of preference judgments that can feasibly be collected. Such value is defined as how well we can evaluate IR systems given a budget, that is, a fixed number of human preference judgments that may be collected. By relying on several datasets featuring relevance judgments gathered by means of experts and crowdsourcing, we experimentally compare alternative strategies to select document pairs and show how different strategies lead to different IR evaluation result quality levels. Our results show that, by using the appropriate procedure, it is possible to achieve good IR evaluation results with a limited number of preference judgments, thus confirming the feasibility of using preference judgments to create IR evaluation collections.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Roitero, Kevin; Mackenzie, Joel; Damiano, Spina; Demartini, Gianluca; Mizzaro, Stefano
BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval Proceedings Article
In: andd Ferro Faggioli, Nicola Guglielmo; Hanbury, Allan; Potthast, Martin (Ed.): Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy, 2022.
@inproceedings{clef-checkthat:2022:task3:La_Barbera_BUM,
title = {BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval},
author = {David La Barbera and Kevin Roitero and Joel Mackenzie and Spina Damiano and Gianluca Demartini and Stefano Mizzaro},
editor = {Nicola Guglielmo andd Ferro Faggioli and Allan Hanbury and Martin Potthast},
year = {2022},
date = {2022-01-01},
booktitle = {Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum},
address = {Bologna, Italy},
series = {CLEF~'2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Draws, Tim; Barbera, David La; Soprano, Michael; Roitero, Kevin; Ceolin, Davide; Checco, Alessandro; Mizzaro, Stefano
The Effects of Crowd Worker Biases in Fact-Checking Tasks Proceedings Article
In: 2022 ACM Conference on Fairness, Accountability, and Transparency, pp. 2114–2124, Association for Computing Machinery, Seoul, Republic of Korea, 2022, ISBN: 9781450393522.
@inproceedings{10.1145/3531146.3534629,
title = {The Effects of Crowd Worker Biases in Fact-Checking Tasks},
author = {Tim Draws and David La Barbera and Michael Soprano and Kevin Roitero and Davide Ceolin and Alessandro Checco and Stefano Mizzaro},
url = {https://doi.org/10.1145/3531146.3534629},
doi = {10.1145/3531146.3534629},
isbn = {9781450393522},
year = {2022},
date = {2022-01-01},
booktitle = {2022 ACM Conference on Fairness, Accountability, and Transparency},
pages = {2114–2124},
publisher = {Association for Computing Machinery},
address = {Seoul, Republic of Korea},
series = {FAccT '22},
abstract = {Due to the increasing amount of information shared online every day, the need for sound and reliable ways of distinguishing between trustworthy and non-trustworthy information is as present as ever. One technique for performing fact-checking at scale is to employ human intelligence in the form of crowd workers. Although earlier work has suggested that crowd workers can reliably identify misinformation, cognitive biases of crowd workers may reduce the quality of truthfulness judgments in this context. We performed a systematic exploratory analysis of publicly available crowdsourced data to identify a set of potential systematic biases that may occur when crowd workers perform fact-checking tasks. Following this exploratory study, we collected a novel data set of crowdsourced truthfulness judgments to validate our hypotheses. Our findings suggest that workers generally overestimate the truthfulness of statements and that different individual characteristics (i.e., their belief in science) and cognitive biases (i.e., the affect heuristic and overconfidence) can affect their annotations. Interestingly, we find that, depending on the general judgment tendencies of workers, their biases may sometimes lead to more accurate judgments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ceschia, Sara; Roitero, Kevin; Demartini, Gianluca; Mizzaro, Stefano; Gaspero, Luca Di; Schaerf, Andrea
Task design in complex crowdsourcing experiments: Item assignment optimization Journal Article
In: Computers & Operations Research, pp. 105995, 2022, ISSN: 0305-0548.
@article{CESCHIA2022105995,
title = {Task design in complex crowdsourcing experiments: Item assignment optimization},
author = {Sara Ceschia and Kevin Roitero and Gianluca Demartini and Stefano Mizzaro and Luca Di Gaspero and Andrea Schaerf},
url = {https://www.sciencedirect.com/science/article/pii/S0305054822002295},
doi = {https://doi.org/10.1016/j.cor.2022.105995},
issn = {0305-0548},
year = {2022},
date = {2022-01-01},
journal = {Computers & Operations Research},
pages = {105995},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Soprano, Michael; Wielemaker, Jan
Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory Journal Article
In: Information Systems, vol. 110, pp. 102107, 2022, ISSN: 0306-4379, (Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)).
@article{CEOLIN2022102107,
title = {Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Michael Soprano and Jan Wielemaker},
doi = {10.1016/j.is.2022.102107},
issn = {0306-4379},
year = {2022},
date = {2022-01-01},
journal = {Information Systems},
volume = {110},
pages = {102107},
abstract = {Review scores collect users’ opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic distance, and the supported ratings. We also provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. (2015), by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method, i.e., to test whether the automated method we use to assess reviews is understandable by humans. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, and complete by online users, and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Mizzaro, Stefano; Spina, Damiano
Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That Proceedings Article
In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 588–598, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450387323.
@inproceedings{10.1145/3477495.3532051,
title = {Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That},
author = {Enrique Amigó and Stefano Mizzaro and Damiano Spina},
url = {https://doi.org/10.1145/3477495.3532051},
doi = {10.1145/3477495.3532051},
isbn = {9781450387323},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {588–598},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
series = {SIGIR '22},
abstract = {Most of information retrieval effectiveness evaluation metrics assume that systems appending irrelevant documents at the bottom of the ranking are as effective as (or not worse than) systems that have a stopping criteria to 'truncate' the ranking at the right position to avoid retrieving those irrelevant documents at the end. It can be argued, however, that such truncated rankings are more useful to the end user. It is thus important to understand how to measure retrieval effectiveness in this scenario. In this paper we provide both theoretical and experimental contributions. We first define formal properties to analyze how effectiveness metrics behave when evaluating truncated rankings. Our theoretical analysis shows that de-facto standard metrics do not satisfy desirable properties to evaluate truncated rankings: only Observational Information Effectiveness (OIE) – a metric based on Shannon's information theory – satisfies them all. We then perform experiments to compare several metrics on nine TREC datasets. According to our experimental results, the most appropriate metrics for truncated rankings are OIE and a novel extension of Rank-Biased Precision that adds a user effort factor penalizing the retrieval of irrelevant documents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Brand, Erik; Roitero, Kevin; Soprano, Michael; Demartini, Gianluca
E-BART: Jointly Predicting and Explaining Truthfulness Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 18–27, Hacks Hackers, 2021.
@inproceedings{conference-paper-tto-2021,
title = {E-BART: Jointly Predicting and Explaining Truthfulness},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_16-1.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021},
pages = {18--27},
publisher = {Hacks Hackers},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity makes them opaque to the end user, making it difficult to foster trust. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-theart on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. Finally, we also conduct human evaluations on the impact of generated explanations and observe that explanations increase human ability to spot misinformation and make people more skeptical about claims.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Portelli, Beatrice; Luise, Massimiliano De; Spina, Damiano; Mea, Vincenzo Della; Serra, Giuseppe; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19 Journal Article
In: Personal and Ubiquitous Computing, 2021, ISSN: 1617-4917.
@article{journal-paper-puc-2021,
title = {Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19},
author = {Kevin Roitero and Michael Soprano and Beatrice Portelli and Massimiliano De Luise and Damiano Spina and Vincenzo Della Mea and Giuseppe Serra and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1007/s00779-021-01604-6},
doi = {10.1007/s00779-021-01604-6},
issn = {1617-4917},
year = {2021},
date = {2021-01-01},
journal = {Personal and Ubiquitous Computing},
abstract = {Recently, the misinformation problem has been addressed with a crowdsourcing-based approach: to assess the truthfulness of a statement, instead of relying on a few experts, a crowd of non-expert is exploited. We study whether crowdsourcing is an effective and reliable method to assess truthfulness during a pandemic, targeting statements related to COVID-19, thus addressing (mis)information that is both related to a sensitive and personal issue and very recent as compared to when the judgment is done. In our experiments, crowd workers are asked to assess the truthfulness of statements, and to provide evidence for the assessments. Besides showing that the crowd is able to accurately judge the truthfulness of the statements, we report results on workers' behavior, agreement among workers, effect of aggregation functions, of scales transformations, and of workers background and bias. We perform a longitudinal study by re-launching the task multiple times with both novice and experienced workers, deriving important insights on how the behavior and quality change over time. Our results show that workers are able to detect and objectively categorize online (mis)information related to COVID-19; both crowdsourced and expert judgments can be transformed and aggregated to improve quality; worker background and other signals (e.g., source of information, behavior) impact the quality of the data. The longitudinal study demonstrates that the time-span has a major effect on the quality of the judgments, for both novice and experienced workers. Finally, we provide an extensive failure analysis of the statements misjudged by the crowd-workers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale Journal Article
In: Information Processing & Management, vol. 58, no 6, pp. 102710, 2021, ISSN: 0306-4573.
@article{journal-paper-ipm-2021,
title = {The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://www.sciencedirect.com/science/article/pii/S0306457321001941},
doi = {https://doi.org/10.1016/j.ipm.2021.102710},
issn = {0306-4573},
year = {2021},
date = {2021-01-01},
journal = {Information Processing & Management},
volume = {58},
number = {6},
pages = {102710},
abstract = {Recent work has demonstrated the viability of using crowdsourcing as a tool for evaluating the truthfulness of public statements. Under certain conditions such as: (1) having a balanced set of workers with different backgrounds and cognitive abilities; (2) using an adequate set of mechanisms to control the quality of the collected data; and (3) using a coarse grained assessment scale, the crowd can provide reliable identification of fake news. However, fake news are a subtle matter: statements can be just biased (“cherrypicked”), imprecise, wrong, etc. and the unidimensional truth scale used in existing work cannot account for such differences. In this paper we propose a multidimensional notion of truthfulness and we ask the crowd workers to assess seven different dimensions of truthfulness selected based on existing literature: Correctness, Neutrality, Comprehensibility, Precision, Completeness, Speaker’s Trustworthiness, and Informativeness. We deploy a set of quality control mechanisms to ensure that the thousands of assessments collected on 180 publicly available fact-checked statements distributed over two datasets are of adequate quality, including a custom search engine used by the crowd workers to find web pages supporting their truthfulness assessments. A comprehensive analysis of crowdsourced judgments shows that: (1) the crowdsourced assessments are reliable when compared to an expert-provided gold standard; (2) the proposed dimensions of truthfulness capture independent pieces of information; (3) the crowdsourcing task can be easily learned by the workers; and (4) the resulting assessments provide a useful basis for a more complete estimation of statement truthfulness.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Wielemaker, Jan; Soprano, Michael
Assessing the Quality of Online Reviews Using Formal Argumentation Theory Proceedings Article
In: Brambilla, Marco; Chbeir, Richard; Frasincar, Flavius; Manolescu, Ioana (Ed.): Web Engineering, pp. 71–87, Springer International Publishing, Cham, 2021, ISBN: 978-3-030-74296-6.
@inproceedings{10.1007/978-3-030-74296-6_6,
title = {Assessing the Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Jan Wielemaker and Michael Soprano},
editor = {Marco Brambilla and Richard Chbeir and Flavius Frasincar and Ioana Manolescu},
doi = {10.1007/978-3-030-74296-6_6},
isbn = {978-3-030-74296-6},
year = {2021},
date = {2021-01-01},
booktitle = {Web Engineering},
pages = {71--87},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {Review scores collect users' opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic similarity, and the supported ratings. We provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. [15], by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, truthful by online users and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Qu, Yunke; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Human-in-the-Loop Systems for Truthfulness: A Study of Human and Machine Confidence Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 40–49, Hacks Hackers, 2021.
@inproceedings{DBLP:conf/tto/QuRMSD21,
title = {Human-in-the-Loop Systems for Truthfulness: A Study of Human and
Machine Confidence},
author = {Yunke Qu and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_29.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021),
Virtual, October 7-8, 2021},
pages = {40--49},
publisher = {Hacks Hackers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Popescu, Mihai Horia; Mea, Vincenzo Della
DiLBERT: Cheap Embeddings for Disease Related Medical NLP Journal Article
In: IEEE Access, vol. 9, pp. 159714-159723, 2021.
@article{9628010,
title = {DiLBERT: Cheap Embeddings for Disease Related Medical NLP},
author = {Kevin Roitero and Beatrice Portelli and Mihai Horia Popescu and Vincenzo Della Mea},
doi = {10.1109/ACCESS.2021.3131386},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
volume = {9},
pages = {159714-159723},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal Journal Article
In: CoRR, vol. abs/2110.13504, 2021.
@article{DBLP:journals/corr/abs-2110-13504,
title = {Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://arxiv.org/abs/2110.13504},
year = {2021},
date = {2021-01-01},
journal = {CoRR},
volume = {abs/2110.13504},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Conde-Sousa, Eduardo; Vale, João; Feng, Ming; Xu, Kele; Wang, Yin; Mea, Vincenzo Della; Barbera, David La; Montahaei, Ehsan; Baghshah, Mahdieh Soleymani; Turzynski, Andreas; Gildenblat, Jacob; Klaiman, Eldad; Hong, Yiyu; Aresta, Guilherme; Araújo, Teresa; Aguiar, Paulo; Eloy, Catarina; Polónia, António
HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization Miscellaneous
2021.
@misc{https://doi.org/10.48550/arxiv.2111.04738,
title = {HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization},
author = {Eduardo Conde-Sousa and João Vale and Ming Feng and Kele Xu and Yin Wang and Vincenzo Della Mea and David La Barbera and Ehsan Montahaei and Mahdieh Soleymani Baghshah and Andreas Turzynski and Jacob Gildenblat and Eldad Klaiman and Yiyu Hong and Guilherme Aresta and Teresa Araújo and Paulo Aguiar and Catarina Eloy and António Polónia},
url = {https://arxiv.org/abs/2111.04738},
doi = {10.48550/ARXIV.2111.04738},
year = {2021},
date = {2021-01-01},
publisher = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca
A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report Proceedings Article
In: 2021 IEEE International Conference on Digital Health (ICDH), pp. 209-211, 2021.
@inproceedings{9581242,
title = {A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report},
author = {David La Barbera and Kevin Roitero and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent},
doi = {10.1109/ICDH52753.2021.00037},
year = {2021},
date = {2021-01-01},
booktitle = {2021 IEEE International Conference on Digital Health (ICDH)},
pages = {209-211},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Barbera, David La; Polónia, António; Roitero, Kevin; Conde-Sousa, Eduardo; Mea, Vincenzo Della
Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning Journal Article
In: Journal of Imaging, vol. 6, no 9, 2020, ISSN: 2313-433X.
@article{labarberaher2,
title = {Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning},
author = {David La Barbera and António Polónia and Kevin Roitero and Eduardo Conde-Sousa and Vincenzo Della Mea},
doi = {10.3390/jimaging6090082},
issn = {2313-433X},
year = {2020},
date = {2020-08-23},
urldate = {2020-08-23},
journal = {Journal of Imaging},
volume = {6},
number = {9},
abstract = {Breast cancer is the most frequently diagnosed cancer in woman. The correct identification of the HER2 receptor is a matter of major importance when dealing with breast cancer: an over-expression of HER2 is associated with aggressive clinical behaviour; moreover, HER2 targeted therapy results in a significant improvement in the overall survival rate. In this work, we employ a pipeline based on a cascade of deep neural network classifiers and multi-instance learning to detect the presence of HER2 from Haematoxylin-Eosin slides, which partly mimics the pathologist’s behaviour by first recognizing cancer and then evaluating HER2. Our results show that the proposed system presents a good overall effectiveness. Furthermore, the system design is prone to further improvements that can be easily deployed in order to increase the effectiveness score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}