Elenco (non esaustivo) di pubblicazioni prodotte dai membri del laboratorio
2024
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Demartini, Gianluca; Mizzaro, Stefano
Cognitive Biases in Fact-Checking and Their Countermeasures: A Review Journal Article
In: Information Processing & Management, vol. 61, no 3, pp. 103672, 2024, ISSN: 0306-4573.
@article{SOPRANO2024103672,
title = {Cognitive Biases in Fact-Checking and Their Countermeasures: A Review},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Gianluca Demartini and Stefano Mizzaro},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324000323},
doi = {10.1016/j.ipm.2024.103672},
issn = {0306-4573},
year = {2024},
date = {2024-02-11},
urldate = {2024-01-01},
journal = {Information Processing & Management},
volume = {61},
number = {3},
pages = {103672},
abstract = {The increase of the amount of misinformation spread every day online is a huge threat to the society. Organizations and researchers are working to contrast this misinformation plague. In this setting, human assessors are indispensable to correctly identify, assess and/or revise the truthfulness of information items, i.e., to perform the fact-checking activity. Assessors, as humans, are subject to systematic errors that might interfere with their fact-checking activity. Among such errors, cognitive biases are those due to the limits of human cognition. Although biases help to minimize the cost of making mistakes, they skew assessments away from an objective perception of information. Cognitive biases, hence, are particularly frequent and critical, and can cause errors that have a huge potential impact as they propagate not only in the community, but also in the datasets used to train automatic and semi-automatic machine learning models to fight misinformation. In this work, we present a review of the cognitive biases which might occur during the fact-checking process. In more detail, inspired by PRISMA – a methodology used for systematic literature reviews – we manually derive a list of 221 cognitive biases that may affect human assessors. Then, we select the 39 biases that might manifest during the fact-checking process, we group them into categories, and we provide a description. Finally, we present a list of 11 countermeasures that can be adopted by researchers, practitioners, and organizations to limit the effect of the identified cognitive biases on the fact-checking activity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Baroni, Giulia L.; Rasotto, Laura; Roitero, Kevin; Siraj, Ameer Hamza; Mea, V. Della
Vision Transformers for Breast Cancer Histology Image Classification Proceedings Article
In: Foresti, Gian Luca; Fusiello, Andrea; Hancock, Edwin (Ed.): Image Analysis and Processing - ICIAP 2023 Workshops, pp. 15–26, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-51026-7.
@inproceedings{10.1007/978-3-031-51026-7_2,
title = {Vision Transformers for Breast Cancer Histology Image Classification},
author = {Giulia L. Baroni and Laura Rasotto and Kevin Roitero and Ameer Hamza Siraj and V. Della Mea},
editor = {Gian Luca Foresti and Andrea Fusiello and Edwin Hancock},
doi = {10.1007/978-3-031-51026-7_2},
isbn = {978-3-031-51026-7},
year = {2024},
date = {2024-01-21},
urldate = {2024-01-01},
booktitle = {Image Analysis and Processing - ICIAP 2023 Workshops},
pages = {15–26},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {We propose a self-attention Vision Transformer (ViT) model tailored for breast cancer histology image classification. The proposed architecture uses a stack of transformer layers, with each layer consisting of a multi-head self-attention mechanism and a position-wise feed-forward network, and it is trained with different strategies and configurations, including pretraining, resize dimension, data augmentation, patch overlap, and patch size, to investigate their impact on performance on the histology image classification task. Experimental results show that pretraining on ImageNet and using geometric and color data augmentation techniques significantly improve the model's accuracy on the task. Additionally, a patch size of 16 $$backslashtimes $$× 16 and no patch overlap were found to be optimal for this task. These findings provide valuable insights for the design of future ViT-based models for similar image classification tasks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ros, Francesca Da; Gaspero, Luca Di; Roitero, Kevin; Barbera, David La; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca; Deroma, Laura
Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region Journal Article
In: Journal of Healthcare Informatics Research, 2024, ISSN: 2509-498X.
@article{DaRos2024,
title = {Supporting Fair and Efficient Emergency Medical Services in a Large Heterogeneous Region},
author = {Francesca Da Ros and Luca Di Gaspero and Kevin Roitero and David La Barbera and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent and Laura Deroma},
url = {https://doi.org/10.1007/s41666-023-00154-1},
doi = {10.1007/s41666-023-00154-1},
issn = {2509-498X},
year = {2024},
date = {2024-01-09},
urldate = {2024-01-09},
journal = {Journal of Healthcare Informatics Research},
abstract = {Emergency Medical Services (EMS) are crucial in delivering timely and effective medical care to patients in need. However, the complex and dynamic nature of operations poses challenges for decision-making processes at strategic, tactical, and operational levels. This paper proposes an action-driven strategy for EMS management, employing a multi-objective optimizer and a simulator to evaluate potential outcomes of decisions. The approach combines historical data with dynamic simulations and multi-objective optimization techniques to inform decision-makers and improve the overall performance of the system. The research focuses on the Friuli Venezia Giulia region in north-eastern Italy. The region encompasses various landscapes and demographic situations that challenge fairness and equity in service access. Similar challenges are faced in other regions with comparable characteristics. The Decision Support System developed in this work accurately models the real-world system and provides valuable feedback and suggestions to EMS professionals, enabling them to make informed decisions and enhance the efficiency and fairness of the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Data Bias Management Journal Article
In: Commun. ACM, vol. 67, no 1, pp. 28–32, 2023, ISSN: 0001-0782.
@article{10.1145/3611641,
title = {Data Bias Management},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://doi.org/10.1145/3611641},
doi = {10.1145/3611641},
issn = {0001-0782},
year = {2023},
date = {2023-12-21},
urldate = {2023-12-01},
journal = {Commun. ACM},
volume = {67},
number = {1},
pages = {28–32},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Envisioning a unique approach toward bias and fairness research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Mea, Vincenzo Della; Mizzaro, Stefano
Towards a Conversational-Based Agent for Health Services Proceedings Article
In: Falchi, Fabrizio; Giannotti, Fosca; Monreale, Anna; Boldrini, Chiara; Rinzivillo, Salvatore; Colantonio, Sara (Ed.): Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence, pp. 278–283, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/italia2023/Soprano23,
title = {Towards a Conversational-Based Agent for Health Services},
author = {Michael Soprano and Kevin Roitero and Vincenzo Della Mea and Stefano Mizzaro},
editor = {Fabrizio Falchi and Fosca Giannotti and Anna Monreale and Chiara Boldrini and Salvatore Rinzivillo and Sara Colantonio},
url = {https://ceur-ws.org/Vol-3486/96.pdf},
year = {2023},
date = {2023-09-20},
urldate = {2023-01-01},
booktitle = {Proceedings of the Italia Intelligenza Artificiale - Thematic Workshops co-located with the 3rd CINI National Lab AIIS Conference on Artificial Intelligence},
volume = {3486},
pages = {278–283},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {Conversational agents provide new modalities to access and interact with services and applications. Recently, they saw a backfire in their popularity, due to the recent advancements in language models. Such agents have been adopted in various fields such as healthcare and education, yet they received little attention in public administration. We describe as a practical use case a service of the portal that provides citizens of the Italian region of Friuli-Venezia Giulia with services related to their own Electronic Health Records. The service considered allows them to search for the available doctors and pediatricians in the region's municipalities. We rely on the use case described to propose a model for a conversational agent-based access modality. The model proposed allows us to lay the foundation for more advanced chatbot-like implementations which will use also alternative input modalities, such as voice-based communication.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Soprano, Michael; Roitero, Kevin; Maddalena, Eddy; Mizzaro, Stefano
Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned Proceedings Article
In: Nardini, Franco Maria; Tonelotto, Nicola; Faggioli, Guglielmo; Ferrara, Antonio (Ed.): Proceedings of the 13th Italian Information Retrieval Workshop, pp. 85–90, CEUR-WS.org, Pisa, Italy, 2023.
@inproceedings{DBLP:conf/iir/BarberaSRMM23,
title = {Fact-Checking at Scale with Crowdsourcing: Experiments and Lessons Learned},
author = {David La Barbera and Michael Soprano and Kevin Roitero and Eddy Maddalena and Stefano Mizzaro},
editor = {Franco Maria Nardini and Nicola Tonelotto and Guglielmo Faggioli and Antonio Ferrara},
url = {https://ceur-ws.org/Vol-3448/paper-18.pdf},
year = {2023},
date = {2023-08-26},
urldate = {2023-08-15},
booktitle = {Proceedings of the 13th Italian Information Retrieval Workshop},
volume = {3448},
pages = {85–90},
publisher = {CEUR-WS.org},
address = {Pisa, Italy},
series = {CEUR Workshop Proceedings},
abstract = {In this paper, we present our journey in exploring the use of crowdsourcing for fact-checking. We discuss our early experiments aimed towards the identification of the best possible setting for misinformation assessment using crowdsourcing. Our results indicate that the crowd can effectively address misinformation at scale, showing some degree of correlation with experts. We also highlight the influence of worker background on the quality of truthfulness assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Barbera, David La; Soprano, Michael; Demartini, Gianluca; Mizzaro, Stefano; Sakai, Tetsuya
How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments Journal Article
In: ACM Transactions on Information Systems, 2023, ISSN: 1046-8188, (Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)).
@article{10.1145/3597201,
title = {How Many Crowd Workers Do I Need? On Statistical Power When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and David La Barbera and Michael Soprano and Gianluca Demartini and Stefano Mizzaro and Tetsuya Sakai},
doi = {10.1145/3597201},
issn = {1046-8188},
year = {2023},
date = {2023-08-18},
urldate = {2023-01-01},
journal = {ACM Transactions on Information Systems},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {To scale the size of Information Retrieval collections, crowdsourcing has become a common way to collect relevance judgments at scale. Crowdsourcing experiments usually employ 100-10,000 workers, but such a number is often decided in a heuristic way. The downside is that the resulting dataset does not have any guarantee of meeting predefined statistical requirements as, for example, have enough statistical power to be able to distinguish in a statistically significant way between the relevance of two documents. We propose a methodology adapted from literature on sound topic set size design, based on t-test and ANOVA, which aims at guaranteeing the resulting dataset to meet a predefined set of statistical requirements. We validate our approach on several public datasets. Our results show that we can reliably estimate the recommended number of workers needed to achieve statistical power, and that such estimation is dependent on the topic, while the effect of the relevance scale is limited. Furthermore, we found that such estimation is dependent on worker features such as agreement. Finally, we describe a set of practical estimation strategies that can be used to estimate the worker set size, and we also provide results on the estimation of document set sizes.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q1 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xie, Haoyu; Maddalena, Eddy; Qarout, Rehab; Checco, Alessandro
The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces Journal Article
In: Computer Supported Cooperative Work (CSCW), vol. 32, no 3, pp. 439-474, 2023, ISSN: 1573-7551.
@article{Xie2023b,
title = {The Dark Side of Recruitment in Crowdsourcing: Ethics and Transparency in Micro-Task Marketplaces},
author = {Haoyu Xie and Eddy Maddalena and Rehab Qarout and Alessandro Checco},
url = {https://doi.org/10.1007/s10606-023-09464-9},
doi = {10.1007/s10606-023-09464-9},
issn = {1573-7551},
year = {2023},
date = {2023-07-28},
urldate = {2023-09-01},
journal = {Computer Supported Cooperative Work (CSCW)},
volume = {32},
number = {3},
pages = {439-474},
abstract = {Micro-task crowdsourcing marketplaces like Figure Eight (F8) connect a large pool of workers to employers through a single online platform, by aggregating multiple crowdsourcing platforms (channels) under a unique system. This paper investigates the F8 channels' demographic distribution and reward schemes by analysing more than 53k crowdsourcing tasks over four years, collecting survey data and scraping marketplace metadata. We reveal an heterogeneous per-channel demographic distribution, and an opaque channel commission scheme, that varies over time and is not communicated to the employer when launching a task: workers often will receive a smaller payment than expected by the employer. In addition, the impact of channel commission schemes on the relationship between requesters and crowdworkers is explored. These observations uncover important issues on ethics, reliability and transparency of crowdsourced experiment when using this kind of marketplaces, especially for academic research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Maddalena, Eddy; Ibáñez, Luis-Daniel; Reeves, Neal; Simperl, Elena
Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives Journal Article
In: ACM Trans. Intell. Syst. Technol., 2023, ISSN: 2157-6904, (Just Accepted).
@article{10.1145/3604940,
title = {Qrowdsmith: Enhancing Paid Microtask Crowdsourcing with Gamification and Furtherance Incentives},
author = {Eddy Maddalena and Luis-Daniel Ibáñez and Neal Reeves and Elena Simperl},
url = {https://doi.org/10.1145/3604940},
doi = {10.1145/3604940},
issn = {2157-6904},
year = {2023},
date = {2023-06-01},
journal = {ACM Trans. Intell. Syst. Technol.},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Microtask crowdsourcing platforms are social intelligence systems in which volunteers, called crowdworkers, complete small, repetitive tasks in return for a small fee. Beyond payments, task requesters are considering non-monetary incentives such as points, badges and other gamified elements to increase performance and improve crowdworker experience. In this paper, we present Qrowdsmith, a platform for gamifying microtask crowdsourcing. To design the system, we explore empirically a range of gamified and financial incentives and analyse their impact on how efficient, effective, and reliable the results are. To maintain participation over time and save costs, we propose furtherance incentives, which are offered to crowdworkers to encourage additional contributions in addition to the fee agreed upfront. In a series of controlled experiments we find that while gamification can work as furtherance incentives, it impacts negatively on crowdworkers performance, both in terms of the quantity and quality of work, as compared to a baseline where they can continue to contribute voluntarily. Gamified incentives are also less effective than paid bonus equivalents. Our results contribute to the understanding of how best to encourage engagement in microtask crowdsourcing activities, and design better crowd intelligence systems.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Roitero, Kevin; Guo, Furong
Predicting Crowd Workers Performance: An Information Quality Case Proceedings Article
In: Garrigós, Irene; Rodríguez, Juan Manuel Murillo; Wimmer, Manuel (Ed.): Web Engineering, pp. 75–90, Springer Nature Switzerland, Cham, 2023, ISBN: 978-3-031-34444-2.
@inproceedings{10.1007/978-3-031-34444-2_6,
title = {Predicting Crowd Workers Performance: An Information Quality Case},
author = {Davide Ceolin and Kevin Roitero and Furong Guo},
editor = {Irene Garrigós and Juan Manuel Murillo Rodríguez and Manuel Wimmer},
doi = {10.1007/978-3-031-34444-2_6},
isbn = {978-3-031-34444-2},
year = {2023},
date = {2023-01-01},
booktitle = {Web Engineering},
pages = {75–90},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Supervised machine learning tasks require human-labeled data. Crowdsourcing allows scaling up the labeling process, but the quality of the labels obtained can vary. To address this limitation, we propose methods for predicting label quality based on worker trajectories, i.e., on the sequence of documents workers explore during their crowdsourcing tasks. Trajectories represent a lightweight and non-intrusive form of worker behavior signal. We base our analysis on previously collected datasets composed of thousands of assessment data records including information such as workers' trajectories, workers' assessments, and experts' assessments. We model such behavior sequences as embeddings, to facilitate their management. Then, we: (1) use supervised methods to predict worker performance using a given ground truth; (2) perform an unsupervised analysis to provide insight into crowdsourcing quality when no gold standard is available. We test several supervised approaches which all beat the baseline we propose. Also, we identify significant differences between trajectory clusters in terms of assessments and worker performance. The trajectory-based analysis is a promising direction for non-intrusive worker performance evaluation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Serra, Giuseppe; Mea, Vincenzo Della; Mizzaro, Stefano; Cerro, Gianni; Vitelli, Michele; Molinara, Mario
Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform Journal Article
In: IEEE Access, vol. 11, pp. 50272–50284, 2023, ISSN: 2169-3536.
@article{10129181,
title = {Detection of Wastewater Pollution Through Natural Language Generation With a Low-Cost Sensing Platform},
author = {Kevin Roitero and Beatrice Portelli and Giuseppe Serra and Vincenzo Della Mea and Stefano Mizzaro and Gianni Cerro and Michele Vitelli and Mario Molinara},
doi = {10.1109/ACCESS.2023.3277535},
issn = {2169-3536},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {50272–50284},
abstract = {The detection of contaminants in several environments (e.g., air, water, sewage systems) is of paramount importance to protect people and predict possible dangerous circumstances. Most works do this using classical Machine Learning tools that act on the acquired measurement data. This paper introduces two main elements: a low-cost platform to acquire, pre-process, and transmit data to classify contaminants in wastewater; and a novel classification approach to classify contaminants in wastewater, based on deep learning and the transformation of raw sensor data into natural language metadata. The proposed solution presents clear advantages against state-of-the-art systems in terms of higher effectiveness and reasonable efficiency. The main disadvantage of the proposed approach is that it relies on knowing the injection time, i.e., the instant in time when the contaminant is injected into the wastewater. For this reason, the developed system also includes a finite state machine tool able to infer the exact time instant when the substance is injected. The entire system is presented and discussed in detail. Furthermore, several variants of the proposed processing technique are also presented to assess the sensitivity to the number of used samples and the corresponding promptness/computational burden of the system. The lowest accuracy obtained by our technique is 91.4%, which is significantly higher than the 81.0% accuracy reached by the best baseline method.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Deldjoo, Yashar; Mizzaro, Stefano; Bellogín, Alejandro
A unifying and general account of fairness measurement in recommender systems Journal Article
In: Information Processing & Management, vol. 60, no 1, pp. 103115, 2023, ISSN: 0306-4573.
@article{AMIGO2023103115,
title = {A unifying and general account of fairness measurement in recommender systems},
author = {Enrique Amigó and Yashar Deldjoo and Stefano Mizzaro and Alejandro Bellogín},
url = {https://www.sciencedirect.com/science/article/pii/S0306457322002163},
doi = {https://doi.org/10.1016/j.ipm.2022.103115},
issn = {0306-4573},
year = {2023},
date = {2023-01-01},
journal = {Information Processing & Management},
volume = {60},
number = {1},
pages = {103115},
abstract = {Fairness is fundamental to all information access systems, including recommender systems. However, the landscape of fairness definition and measurement is quite scattered with many competing definitions that are partial and often incompatible. There is much work focusing on specific – and different – notions of fairness and there exist dozens of metrics of fairness in the literature, many of them redundant and most of them incompatible. In contrast, to our knowledge, there is no formal framework that covers all possible variants of fairness and allows developers to choose the most appropriate variant depending on the particular scenario. In this paper, we aim to define a general, flexible, and parameterizable framework that covers a whole range of fairness evaluation possibilities. Instead of modeling the metrics based on an abstract definition of fairness, the distinctive feature of this study compared to the current state of the art is that we start from the metrics applied in the literature to obtain a unified model by generalization. The framework is grounded on a general work hypothesis: interpreting the space of users and items as a probabilistic sample space, two fundamental measures in information theory (Kullback–Leibler Divergence and Mutual Information) can capture the majority of possible scenarios for measuring fairness on recommender system outputs. In addition, earlier research on fairness in recommender systems could be viewed as single-sided, trying to optimize some form of equity across either user groups or provider/procurer groups, without considering the user/item space in conjunction, thereby overlooking/disregarding the interplay between user and item groups. Instead, our framework includes the notion of statistical independence between user and item groups. We finally validate our approach experimentally on both synthetic and real data according to a wide range of state-of-the-art recommendation algorithms and real-world data sets, showing that with our framework we can measure fairness in a general, uniform, and meaningful way.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Gonzalo, Julio; Mizzaro, Stefano
What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory Journal Article
In: IEEE Transactions on Knowledge and Data Engineering, vol. 35, no 2, pp. 2147–2157, 2023.
@article{9528028,
title = {What is My Problem? Identifying Formal Tasks and Metrics in Data Mining on the Basis of Measurement Theory},
author = {Enrique Amigó and Julio Gonzalo and Stefano Mizzaro},
doi = {10.1109/TKDE.2021.3109823},
year = {2023},
date = {2023-01-01},
journal = {IEEE Transactions on Knowledge and Data Engineering},
volume = {35},
number = {2},
pages = {2147–2157},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
Brand, Erik; Roitero, Kevin; Soprano, Michael; Rahimi, Afshin; Demartini, Gianluca
A Neural Model to Jointly Predict and Explain Truthfulness of Statements Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546917,
title = {A Neural Model to Jointly Predict and Explain Truthfulness of Statements},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Afshin Rahimi and Gianluca Demartini},
url = {https://doi.org/10.1145/3546917},
doi = {10.1145/3546917},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity usually makes them opaque to the end user, making it difficult to foster trust in the system. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-the-art on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. We also calibrate the E-BART model, allowing the output of the final model be correctly interpreted as the confidence of correctness. Finally, we also conduct and extensive human evaluation on the impact of generated explanations and observe that: explanations increase human ability to spot misinformation and make people more skeptical about claims, and explanations generated by E-BART are competitive with ground truth explanations.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Qu, Yunke; Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Combining Human and Machine Confidence in Truthfulness Assessment Journal Article
In: J. Data and Information Quality, 2022, ISSN: 1936-1955, (Just Accepted).
@article{10.1145/3546916,
title = {Combining Human and Machine Confidence in Truthfulness Assessment},
author = {Yunke Qu and David La Barbera and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
url = {https://doi.org/10.1145/3546916},
doi = {10.1145/3546916},
issn = {1936-1955},
year = {2022},
date = {2022-05-01},
journal = {J. Data and Information Quality},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Automatically detecting online misinformation at scale is a challenging and interdisciplinary problem. Deciding what is to be considered truthful information is sometimes controversial and difficult also for educated experts. As the scale of the problem increases, human-in-the-loop approaches to truthfulness that combine both the scalability of machine learning (ML) and the accuracy of human contributions have been considered. In this work we look at the potential to automatically combine machine-based systems with human-based systems. The former exploit supervised ML approaches; the latter involve either crowd workers (i.e., human non-experts) or human experts. Since both ML and crowdsourcing approaches can produce a score indicating the level of confidence on their truthfulness judgments (either algorithmic or self-reported, respectively), we address the question of whether it is feasible to make use of such confidence scores to effectively and efficiently combine three approaches: (i) machine-based methods; (ii) crowd workers, and (iii) human experts. The three approaches differ significantly as they range from available, cheap, fast, scalable, but less accurate to scarce, expensive, slow, not scalable, but highly accurate.},
note = {Just Accepted},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Bona, Francesco Bombassei De; Mizzaro, Stefano
Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf Proceedings Article
In: Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining, pp. 1605–1608, Association for Computing Machinery, Virtual Event, AZ, USA, 2022, ISBN: 9781450391320.
@inproceedings{conference-paper-wsdm2022,
title = {Crowd_Frame: A Simple and Complete Framework to Deploy Complex Crowdsourcing Tasks Off-the-Shelf},
author = {Michael Soprano and Kevin Roitero and Francesco Bombassei De Bona and Stefano Mizzaro},
doi = {10.1145/3488560.3502182},
isbn = {9781450391320},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining},
pages = {1605–1608},
publisher = {Association for Computing Machinery},
address = {Virtual Event, AZ, USA},
series = {WSDM '22},
abstract = {Due to their relatively low cost and ability to scale, crowdsourcing based approaches are widely used to collect a large amount of human annotated data. To this aim, multiple crowdsourcing platforms exist, where requesters can upload tasks and workers can carry them out and obtain payment in return. Such platforms share a task design and deploy workflow that is often counter-intuitive and cumbersome. To address this issue, we propose Crowd_Frame, a simple and complete framework which allows to develop and deploy diverse types of complex crowdsourcing tasks in an easy and customizable way. We show the abilities of the proposed framework and we make it available to researchers and practitioners.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Checco, Alessandro; Mizzaro, Stefano; Demartini, Gianluca
Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments Proceedings Article
In: Proceedings of the ACM Web Conference 2022, pp. 319–327, Association for Computing Machinery, Virtual Event, Lyon, France, 2022, ISBN: 9781450390965.
@inproceedings{10.1145/3485447.3511960,
title = {Preferences on a Budget: Prioritizing Document Pairs When Crowdsourcing Relevance Judgments},
author = {Kevin Roitero and Alessandro Checco and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3485447.3511960},
doi = {10.1145/3485447.3511960},
isbn = {9781450390965},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the ACM Web Conference 2022},
pages = {319–327},
publisher = {Association for Computing Machinery},
address = {Virtual Event, Lyon, France},
series = {WWW '22},
abstract = {In Information Retrieval (IR) evaluation, preference judgments are collected by presenting to the assessors a pair of documents and asking them to select which of the two, if any, is the most relevant. This is an alternative to the classic relevance judgment approach, in which human assessors judge the relevance of a single document on a scale; such an alternative allows to make relative rather than absolute judgments of relevance. While preference judgments are easier for human assessors to perform, the number of possible document pairs to be judged is usually so high that it makes it unfeasible to judge them all. Thus, following a similar idea to pooling strategies for single document relevance judgments where the goal is to sample the most useful documents to be judged, in this work we focus on analyzing alternative ways to sample document pairs to judge, in order to maximize the value of a fixed number of preference judgments that can feasibly be collected. Such value is defined as how well we can evaluate IR systems given a budget, that is, a fixed number of human preference judgments that may be collected. By relying on several datasets featuring relevance judgments gathered by means of experts and crowdsourcing, we experimentally compare alternative strategies to select document pairs and show how different strategies lead to different IR evaluation result quality levels. Our results show that, by using the appropriate procedure, it is possible to achieve good IR evaluation results with a limited number of preference judgments, thus confirming the feasibility of using preference judgments to create IR evaluation collections.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Roitero, Kevin; Mackenzie, Joel; Damiano, Spina; Demartini, Gianluca; Mizzaro, Stefano
BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval Proceedings Article
In: andd Ferro Faggioli, Nicola Guglielmo; Hanbury, Allan; Potthast, Martin (Ed.): Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy, 2022.
@inproceedings{clef-checkthat:2022:task3:La_Barbera_BUM,
title = {BUM at CheckThat! 2022: A Composite Deep Learning Approach to Fake News Detection using Evidence Retrieval},
author = {David La Barbera and Kevin Roitero and Joel Mackenzie and Spina Damiano and Gianluca Demartini and Stefano Mizzaro},
editor = {Nicola Guglielmo andd Ferro Faggioli and Allan Hanbury and Martin Potthast},
year = {2022},
date = {2022-01-01},
booktitle = {Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum},
address = {Bologna, Italy},
series = {CLEF~'2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Draws, Tim; Barbera, David La; Soprano, Michael; Roitero, Kevin; Ceolin, Davide; Checco, Alessandro; Mizzaro, Stefano
The Effects of Crowd Worker Biases in Fact-Checking Tasks Proceedings Article
In: 2022 ACM Conference on Fairness, Accountability, and Transparency, pp. 2114–2124, Association for Computing Machinery, Seoul, Republic of Korea, 2022, ISBN: 9781450393522.
@inproceedings{10.1145/3531146.3534629,
title = {The Effects of Crowd Worker Biases in Fact-Checking Tasks},
author = {Tim Draws and David La Barbera and Michael Soprano and Kevin Roitero and Davide Ceolin and Alessandro Checco and Stefano Mizzaro},
url = {https://doi.org/10.1145/3531146.3534629},
doi = {10.1145/3531146.3534629},
isbn = {9781450393522},
year = {2022},
date = {2022-01-01},
booktitle = {2022 ACM Conference on Fairness, Accountability, and Transparency},
pages = {2114–2124},
publisher = {Association for Computing Machinery},
address = {Seoul, Republic of Korea},
series = {FAccT '22},
abstract = {Due to the increasing amount of information shared online every day, the need for sound and reliable ways of distinguishing between trustworthy and non-trustworthy information is as present as ever. One technique for performing fact-checking at scale is to employ human intelligence in the form of crowd workers. Although earlier work has suggested that crowd workers can reliably identify misinformation, cognitive biases of crowd workers may reduce the quality of truthfulness judgments in this context. We performed a systematic exploratory analysis of publicly available crowdsourced data to identify a set of potential systematic biases that may occur when crowd workers perform fact-checking tasks. Following this exploratory study, we collected a novel data set of crowdsourced truthfulness judgments to validate our hypotheses. Our findings suggest that workers generally overestimate the truthfulness of statements and that different individual characteristics (i.e., their belief in science) and cognitive biases (i.e., the affect heuristic and overconfidence) can affect their annotations. Interestingly, we find that, depending on the general judgment tendencies of workers, their biases may sometimes lead to more accurate judgments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ceschia, Sara; Roitero, Kevin; Demartini, Gianluca; Mizzaro, Stefano; Gaspero, Luca Di; Schaerf, Andrea
Task design in complex crowdsourcing experiments: Item assignment optimization Journal Article
In: Computers & Operations Research, pp. 105995, 2022, ISSN: 0305-0548.
@article{CESCHIA2022105995,
title = {Task design in complex crowdsourcing experiments: Item assignment optimization},
author = {Sara Ceschia and Kevin Roitero and Gianluca Demartini and Stefano Mizzaro and Luca Di Gaspero and Andrea Schaerf},
url = {https://www.sciencedirect.com/science/article/pii/S0305054822002295},
doi = {https://doi.org/10.1016/j.cor.2022.105995},
issn = {0305-0548},
year = {2022},
date = {2022-01-01},
journal = {Computers & Operations Research},
pages = {105995},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Soprano, Michael; Wielemaker, Jan
Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory Journal Article
In: Information Systems, vol. 110, pp. 102107, 2022, ISSN: 0306-4379, (Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)).
@article{CEOLIN2022102107,
title = {Transparent Assessment of Information Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Michael Soprano and Jan Wielemaker},
doi = {10.1016/j.is.2022.102107},
issn = {0306-4379},
year = {2022},
date = {2022-01-01},
journal = {Information Systems},
volume = {110},
pages = {102107},
abstract = {Review scores collect users’ opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic distance, and the supported ratings. We also provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. (2015), by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method, i.e., to test whether the automated method we use to assess reviews is understandable by humans. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, and complete by online users, and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
note = {Journal Ranks: Journal Citation Reports (JCR) Q2 (2021), Scimago (SJR) Q1 (2021)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Mizzaro, Stefano; Spina, Damiano
Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That Proceedings Article
In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 588–598, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450387323.
@inproceedings{10.1145/3477495.3532051,
title = {Ranking Interruptus: When Truncated Rankings Are Better and How to Measure That},
author = {Enrique Amigó and Stefano Mizzaro and Damiano Spina},
url = {https://doi.org/10.1145/3477495.3532051},
doi = {10.1145/3477495.3532051},
isbn = {9781450387323},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {588–598},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
series = {SIGIR '22},
abstract = {Most of information retrieval effectiveness evaluation metrics assume that systems appending irrelevant documents at the bottom of the ranking are as effective as (or not worse than) systems that have a stopping criteria to 'truncate' the ranking at the right position to avoid retrieving those irrelevant documents at the end. It can be argued, however, that such truncated rankings are more useful to the end user. It is thus important to understand how to measure retrieval effectiveness in this scenario. In this paper we provide both theoretical and experimental contributions. We first define formal properties to analyze how effectiveness metrics behave when evaluating truncated rankings. Our theoretical analysis shows that de-facto standard metrics do not satisfy desirable properties to evaluate truncated rankings: only Observational Information Effectiveness (OIE) – a metric based on Shannon's information theory – satisfies them all. We then perform experiments to compare several metrics on nine TREC datasets. According to our experimental results, the most appropriate metrics for truncated rankings are OIE and a novel extension of Rank-Biased Precision that adds a user effort factor penalizing the retrieval of irrelevant documents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Brand, Erik; Roitero, Kevin; Soprano, Michael; Demartini, Gianluca
E-BART: Jointly Predicting and Explaining Truthfulness Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 18–27, Hacks Hackers, 2021.
@inproceedings{conference-paper-tto-2021,
title = {E-BART: Jointly Predicting and Explaining Truthfulness},
author = {Erik Brand and Kevin Roitero and Michael Soprano and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_16-1.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021},
pages = {18--27},
publisher = {Hacks Hackers},
abstract = {Automated fact-checking (AFC) systems exist to combat disinformation, however their complexity makes them opaque to the end user, making it difficult to foster trust. In this paper, we introduce the E-BART model with the hope of making progress on this front. E-BART is able to provide a veracity prediction for a claim, and jointly generate a human-readable explanation for this decision. We show that E-BART is competitive with the state-of-theart on the e-FEVER and e-SNLI tasks. In addition, we validate the joint-prediction architecture by showing 1) that generating explanations does not significantly impede the model from performing well in its main task of veracity prediction, and 2) that predicted veracity and explanations are more internally coherent when generated jointly than separately. Finally, we also conduct human evaluations on the impact of generated explanations and observe that explanations increase human ability to spot misinformation and make people more skeptical about claims.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Portelli, Beatrice; Luise, Massimiliano De; Spina, Damiano; Mea, Vincenzo Della; Serra, Giuseppe; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19 Journal Article
In: Personal and Ubiquitous Computing, 2021, ISSN: 1617-4917.
@article{journal-paper-puc-2021,
title = {Can The Crowd Judge Truthfulness? A Longitudinal Study on Recent Misinformation About COVID-19},
author = {Kevin Roitero and Michael Soprano and Beatrice Portelli and Massimiliano De Luise and Damiano Spina and Vincenzo Della Mea and Giuseppe Serra and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1007/s00779-021-01604-6},
doi = {10.1007/s00779-021-01604-6},
issn = {1617-4917},
year = {2021},
date = {2021-01-01},
journal = {Personal and Ubiquitous Computing},
abstract = {Recently, the misinformation problem has been addressed with a crowdsourcing-based approach: to assess the truthfulness of a statement, instead of relying on a few experts, a crowd of non-expert is exploited. We study whether crowdsourcing is an effective and reliable method to assess truthfulness during a pandemic, targeting statements related to COVID-19, thus addressing (mis)information that is both related to a sensitive and personal issue and very recent as compared to when the judgment is done. In our experiments, crowd workers are asked to assess the truthfulness of statements, and to provide evidence for the assessments. Besides showing that the crowd is able to accurately judge the truthfulness of the statements, we report results on workers' behavior, agreement among workers, effect of aggregation functions, of scales transformations, and of workers background and bias. We perform a longitudinal study by re-launching the task multiple times with both novice and experienced workers, deriving important insights on how the behavior and quality change over time. Our results show that workers are able to detect and objectively categorize online (mis)information related to COVID-19; both crowdsourced and expert judgments can be transformed and aggregated to improve quality; worker background and other signals (e.g., source of information, behavior) impact the quality of the data. The longitudinal study demonstrates that the time-span has a major effect on the quality of the judgments, for both novice and experienced workers. Finally, we provide an extensive failure analysis of the statements misjudged by the crowd-workers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Soprano, Michael; Roitero, Kevin; Barbera, David La; Ceolin, Davide; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale Journal Article
In: Information Processing & Management, vol. 58, no 6, pp. 102710, 2021, ISSN: 0306-4573.
@article{journal-paper-ipm-2021,
title = {The Many Dimensions of Truthfulness: Crowdsourcing Misinformation Assessments on a Multidimensional Scale},
author = {Michael Soprano and Kevin Roitero and David La Barbera and Davide Ceolin and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://www.sciencedirect.com/science/article/pii/S0306457321001941},
doi = {https://doi.org/10.1016/j.ipm.2021.102710},
issn = {0306-4573},
year = {2021},
date = {2021-01-01},
journal = {Information Processing & Management},
volume = {58},
number = {6},
pages = {102710},
abstract = {Recent work has demonstrated the viability of using crowdsourcing as a tool for evaluating the truthfulness of public statements. Under certain conditions such as: (1) having a balanced set of workers with different backgrounds and cognitive abilities; (2) using an adequate set of mechanisms to control the quality of the collected data; and (3) using a coarse grained assessment scale, the crowd can provide reliable identification of fake news. However, fake news are a subtle matter: statements can be just biased (“cherrypicked”), imprecise, wrong, etc. and the unidimensional truth scale used in existing work cannot account for such differences. In this paper we propose a multidimensional notion of truthfulness and we ask the crowd workers to assess seven different dimensions of truthfulness selected based on existing literature: Correctness, Neutrality, Comprehensibility, Precision, Completeness, Speaker’s Trustworthiness, and Informativeness. We deploy a set of quality control mechanisms to ensure that the thousands of assessments collected on 180 publicly available fact-checked statements distributed over two datasets are of adequate quality, including a custom search engine used by the crowd workers to find web pages supporting their truthfulness assessments. A comprehensive analysis of crowdsourced judgments shows that: (1) the crowdsourced assessments are reliable when compared to an expert-provided gold standard; (2) the proposed dimensions of truthfulness capture independent pieces of information; (3) the crowdsourcing task can be easily learned by the workers; and (4) the resulting assessments provide a useful basis for a more complete estimation of statement truthfulness.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ceolin, Davide; Primiero, Giuseppe; Wielemaker, Jan; Soprano, Michael
Assessing the Quality of Online Reviews Using Formal Argumentation Theory Proceedings Article
In: Brambilla, Marco; Chbeir, Richard; Frasincar, Flavius; Manolescu, Ioana (Ed.): Web Engineering, pp. 71–87, Springer International Publishing, Cham, 2021, ISBN: 978-3-030-74296-6.
@inproceedings{10.1007/978-3-030-74296-6_6,
title = {Assessing the Quality of Online Reviews Using Formal Argumentation Theory},
author = {Davide Ceolin and Giuseppe Primiero and Jan Wielemaker and Michael Soprano},
editor = {Marco Brambilla and Richard Chbeir and Flavius Frasincar and Ioana Manolescu},
doi = {10.1007/978-3-030-74296-6_6},
isbn = {978-3-030-74296-6},
year = {2021},
date = {2021-01-01},
booktitle = {Web Engineering},
pages = {71--87},
publisher = {Springer International Publishing},
address = {Cham},
abstract = {Review scores collect users' opinions in a simple and intuitive manner. However, review scores are also easily manipulable, hence they are often accompanied by explanations. A substantial amount of research has been devoted to ascertaining the quality of reviews, to identify the most useful and authentic scores through explanation analysis. In this paper, we advance the state of the art in review quality analysis. We introduce a rating system to identify review arguments and to define an appropriate weighted semantics through formal argumentation theory. We introduce an algorithm to construct a corresponding graph, based on a selection of weighted arguments, their semantic similarity, and the supported ratings. We provide an algorithm to identify the model of such an argumentation graph, maximizing the overall weight of the admitted nodes and edges. We evaluate these contributions on the Amazon review dataset by McAuley et al. [15], by comparing the results of our argumentation assessment with the upvotes received by the reviews. Also, we deepen the evaluation by crowdsourcing a multidimensional assessment of reviews and comparing it to the argumentation assessment. Lastly, we perform a user study to evaluate the explainability of our method. Our method achieves two goals: (1) it identifies reviews that are considered useful, comprehensible, truthful by online users and does so in an unsupervised manner, and (2) it provides an explanation of quality assessments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Qu, Yunke; Roitero, Kevin; Mizzaro, Stefano; Spina, Damiano; Demartini, Gianluca
Human-in-the-Loop Systems for Truthfulness: A Study of Human and Machine Confidence Proceedings Article
In: Augenstein, Isabelle; Papotti, Paolo; Wright, Dustin (Ed.): Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021), Virtual, October 7-8, 2021, pp. 40–49, Hacks Hackers, 2021.
@inproceedings{DBLP:conf/tto/QuRMSD21,
title = {Human-in-the-Loop Systems for Truthfulness: A Study of Human and
Machine Confidence},
author = {Yunke Qu and Kevin Roitero and Stefano Mizzaro and Damiano Spina and Gianluca Demartini},
editor = {Isabelle Augenstein and Paolo Papotti and Dustin Wright},
url = {https://truthandtrustonline.com/wp-content/uploads/2021/10/TTO2021_paper_29.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 2021 Truth and Trust Online Conference (TTO 2021),
Virtual, October 7-8, 2021},
pages = {40--49},
publisher = {Hacks Hackers},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Portelli, Beatrice; Popescu, Mihai Horia; Mea, Vincenzo Della
DiLBERT: Cheap Embeddings for Disease Related Medical NLP Journal Article
In: IEEE Access, vol. 9, pp. 159714-159723, 2021.
@article{9628010,
title = {DiLBERT: Cheap Embeddings for Disease Related Medical NLP},
author = {Kevin Roitero and Beatrice Portelli and Mihai Horia Popescu and Vincenzo Della Mea},
doi = {10.1109/ACCESS.2021.3131386},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
volume = {9},
pages = {159714-159723},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Demartini, Gianluca; Roitero, Kevin; Mizzaro, Stefano
Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal Journal Article
In: CoRR, vol. abs/2110.13504, 2021.
@article{DBLP:journals/corr/abs-2110-13504,
title = {Managing Bias in Human-Annotated Data: Moving Beyond Bias Removal},
author = {Gianluca Demartini and Kevin Roitero and Stefano Mizzaro},
url = {https://arxiv.org/abs/2110.13504},
year = {2021},
date = {2021-01-01},
journal = {CoRR},
volume = {abs/2110.13504},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Conde-Sousa, Eduardo; Vale, João; Feng, Ming; Xu, Kele; Wang, Yin; Mea, Vincenzo Della; Barbera, David La; Montahaei, Ehsan; Baghshah, Mahdieh Soleymani; Turzynski, Andreas; Gildenblat, Jacob; Klaiman, Eldad; Hong, Yiyu; Aresta, Guilherme; Araújo, Teresa; Aguiar, Paulo; Eloy, Catarina; Polónia, António
HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization Miscellaneous
2021.
@misc{https://doi.org/10.48550/arxiv.2111.04738,
title = {HEROHE Challenge: assessing HER2 status in breast cancer without immunohistochemistry or in situ hybridization},
author = {Eduardo Conde-Sousa and João Vale and Ming Feng and Kele Xu and Yin Wang and Vincenzo Della Mea and David La Barbera and Ehsan Montahaei and Mahdieh Soleymani Baghshah and Andreas Turzynski and Jacob Gildenblat and Eldad Klaiman and Yiyu Hong and Guilherme Aresta and Teresa Araújo and Paulo Aguiar and Catarina Eloy and António Polónia},
url = {https://arxiv.org/abs/2111.04738},
doi = {10.48550/ARXIV.2111.04738},
year = {2021},
date = {2021-01-01},
publisher = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Barbera, David La; Roitero, Kevin; Mizzaro, Stefano; Mea, Vincenzo Della; Valent, Francesca
A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report Proceedings Article
In: 2021 IEEE International Conference on Digital Health (ICDH), pp. 209-211, 2021.
@inproceedings{9581242,
title = {A Software Simulator for Optimizing Ambulance Location and Response Time: A Preliminary Report},
author = {David La Barbera and Kevin Roitero and Stefano Mizzaro and Vincenzo Della Mea and Francesca Valent},
doi = {10.1109/ICDH52753.2021.00037},
year = {2021},
date = {2021-01-01},
booktitle = {2021 IEEE International Conference on Digital Health (ICDH)},
pages = {209-211},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Barbera, David La; Polónia, António; Roitero, Kevin; Conde-Sousa, Eduardo; Mea, Vincenzo Della
Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning Journal Article
In: Journal of Imaging, vol. 6, no 9, 2020, ISSN: 2313-433X.
@article{labarberaher2,
title = {Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning},
author = {David La Barbera and António Polónia and Kevin Roitero and Eduardo Conde-Sousa and Vincenzo Della Mea},
doi = {10.3390/jimaging6090082},
issn = {2313-433X},
year = {2020},
date = {2020-08-23},
urldate = {2020-08-23},
journal = {Journal of Imaging},
volume = {6},
number = {9},
abstract = {Breast cancer is the most frequently diagnosed cancer in woman. The correct identification of the HER2 receptor is a matter of major importance when dealing with breast cancer: an over-expression of HER2 is associated with aggressive clinical behaviour; moreover, HER2 targeted therapy results in a significant improvement in the overall survival rate. In this work, we employ a pipeline based on a cascade of deep neural network classifiers and multi-instance learning to detect the presence of HER2 from Haematoxylin-Eosin slides, which partly mimics the pathologist’s behaviour by first recognizing cancer and then evaluating HER2. Our results show that the proposed system presents a good overall effectiveness. Furthermore, the system design is prone to further improvements that can be easily deployed in order to increase the effectiveness score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Soprano, Michael; Fan, Shaoyang; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background Proceedings Article
In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 439–448, Association for Computing Machinery, Virtual Event, China, 2020, ISBN: 9781450380164.
@inproceedings{10.1145/3397271.3401112,
title = {Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background},
author = {Kevin Roitero and Michael Soprano and Shaoyang Fan and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3397271.3401112},
doi = {10.1145/3397271.3401112},
isbn = {9781450380164},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {439–448},
publisher = {Association for Computing Machinery},
address = {Virtual Event, China},
series = {SIGIR '20},
abstract = {Truthfulness judgments are a fundamental step in the process of fighting misinformation, as they are crucial to train and evaluate classifiers that automatically distinguish true and false statements. Usually such judgments are made by experts, like journalists for political statements or medical doctors for medical statements. In this paper, we follow a different approach and rely on (non-expert) crowd workers. This of course leads to the following research question: Can crowdsourcing be reliably used to assess the truthfulness of information and to create large-scale labeled collections for information credibility systems? To address this issue, we present the results of an extensive study based on crowdsourcing: we collect thousands of truthfulness assessments over two datasets, and we compare expert judgments with crowd judgments, expressed on scales with various granularity levels. We also measure the political bias and the cognitive background of the workers, and quantify their effect on the reliability of the data provided by the crowd.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Culpepper, Shane J; Sanderson, Mark; Scholer, Falk; Mizzaro, Stefano
Fewer topics? A million topics? Both?! On topics subsets in test collections Journal Article
In: Inf. Retr. J., vol. 23, no 1, pp. 49–85, 2020.
@article{DBLP:journals/ir/RoiteroCSSM20,
title = {Fewer topics? A million topics? Both?! On topics subsets in test
collections},
author = {Kevin Roitero and Shane J Culpepper and Mark Sanderson and Falk Scholer and Stefano Mizzaro},
url = {https://doi.org/10.1007/s10791-019-09357-w},
doi = {10.1007/s10791-019-09357-w},
year = {2020},
date = {2020-01-01},
journal = {Inf. Retr. J.},
volume = {23},
number = {1},
pages = {49--85},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Han, Lei; Maddalena, Eddy; Checco, Alessandro; Sarasua, Cristina; Gadiraju, Ujwal; Roitero, Kevin; Demartini, Gianluca
Crowd Worker Strategies in Relevance Judgment Tasks Proceedings Article
In: Proceedings of the 13th International Conference on Web Search and Data Mining, pp. 241–249, Association for Computing Machinery, Houston, TX, USA, 2020, ISBN: 9781450368223.
@inproceedings{10.1145/3336191.3371857,
title = {Crowd Worker Strategies in Relevance Judgment Tasks},
author = {Lei Han and Eddy Maddalena and Alessandro Checco and Cristina Sarasua and Ujwal Gadiraju and Kevin Roitero and Gianluca Demartini},
url = {https://doi.org/10.1145/3336191.3371857},
doi = {10.1145/3336191.3371857},
isbn = {9781450368223},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 13th International Conference on Web Search and Data Mining},
pages = {241–249},
publisher = {Association for Computing Machinery},
address = {Houston, TX, USA},
series = {WSDM '20},
abstract = {Crowdsourcing is a popular technique to collect large amounts of human-generated labels, such as relevance judgments used to create information retrieval (IR) evaluation collections. Previous research has shown how collecting high quality labels from a crowdsourcing platform can be challenging. Existing quality assurance techniques focus on answer aggregation or on the use of gold questions where ground-truth data allows to check for the quality of the responses.In this paper, we present qualitative and quantitative results, revealing how different crowd workers adopt different work strategies to complete relevance judgment tasks efficiently and their consequent impact on quality. We delve into the techniques and tools that highly experienced crowd workers use to be more efficient in completing crowdsourcing micro-tasks. To this end, we use both qualitative results from worker interviews and surveys, as well as the results of a data-driven study of behavioral log data (i.e., clicks, keystrokes and keyboard shortcuts) collected from crowd workers performing relevance judgment tasks. Our results highlight the presence of frequently used shortcut patterns that can speed-up task completion, thus increasing the hourly wage of efficient workers. We observe how crowd work experiences result in different types of working strategies, productivity levels, quality and diversity of the crowdsourced judgments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Brunello, Andrea; Serra, Giuseppe; Mizzaro, Stefano
Effectiveness evaluation without human relevance judgments: A systematic analysis of existing methods and of their combinations Journal Article
In: Information Processing & Management, vol. 57, no 2, pp. 102149, 2020, ISSN: 0306-4573.
@article{ROITERO2020102149,
title = {Effectiveness evaluation without human relevance judgments: A systematic analysis of existing methods and of their combinations},
author = {Kevin Roitero and Andrea Brunello and Giuseppe Serra and Stefano Mizzaro},
url = {http://www.sciencedirect.com/science/article/pii/S030645731930192X},
doi = {https://doi.org/10.1016/j.ipm.2019.102149},
issn = {0306-4573},
year = {2020},
date = {2020-01-01},
journal = {Information Processing & Management},
volume = {57},
number = {2},
pages = {102149},
abstract = {In test collection based evaluation of retrieval effectiveness, it has been suggested to completely avoid using human relevance judgments. Although several methods have been proposed, their accuracy is still limited. In this paper we present two overall contributions. First, we provide a systematic comparison of all the most widely adopted previous approaches on a large set of 14 TREC collections. We aim at analyzing the methods in a homogeneous and complete way, in terms of the accuracy measures used as well as in terms of the datasets selected, showing that considerably different results may be achieved considering different methods, datasets, and measures. Second, we study the combination of such methods, which, to the best of our knowledge, has not been investigated so far. Our experimental results show that simple combination strategies based on data fusion techniques are usually not effective and even harmful. However, some more sophisticated solutions, based on machine learning, are indeed effective and often outperform all individual methods. Moreover, they are more stable, as they show a smaller variation across datasets. Our results have the practical implication that, when trying to automatically evaluate retrieval effectiveness, researchers should not use a single method, but a (machine-learning based) combination of them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Soprano, Michael; Portelli, Beatrice; Spina, Damiano; Mea, Vincenzo Della; Serra, Giuseppe; Mizzaro, Stefano; Demartini, Gianluca
The COVID-19 Infodemic: Can the Crowd Judge Recent Misinformation Objectively? Proceedings Article
In: Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM2020). Galway, Ireland (Online). October 19-23, 2020. Conference Rank: GGS A+, Core A, pp. 1305–1314, Association for Computing Machinery, Virtual Event, Ireland, 2020, ISBN: 9781450368599.
@inproceedings{conference-paper-cikm2020,
title = {The COVID-19 Infodemic: Can the Crowd Judge Recent Misinformation Objectively?},
author = {Kevin Roitero and Michael Soprano and Beatrice Portelli and Damiano Spina and Vincenzo Della Mea and Giuseppe Serra and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3340531.3412048},
doi = {10.1145/3340531.3412048},
isbn = {9781450368599},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM2020). Galway, Ireland (Online). October 19-23, 2020. Conference Rank: GGS A+, Core A},
pages = {1305–1314},
publisher = {Association for Computing Machinery},
address = {Virtual Event, Ireland},
series = {CIKM '20},
abstract = {Misinformation is an ever increasing problem that is difficult to solve for the research community and has a negative impact on the society at large. Very recently, the problem has been addressed with a crowdsourcing-based approach to scale up labeling efforts: to assess the truthfulness of a statement, instead of relying on a few experts, a crowd of (non-expert) judges is exploited. We follow the same approach to study whether crowdsourcing is an effective and reliable method to assess statements truthfulness during a pandemic. We specifically target statements related to the COVID-19 health emergency, that is still ongoing at the time of the study and has arguably caused an increase of the amount of misinformation that is spreading online (a phenomenon for which the term "infodemic" has been used). By doing so, we are able to address (mis)information that is both related to a sensitive and personal issue like health and very recent as compared to when the judgment is done: two issues that have not been analyzed in related work.In our experiment, crowd workers are asked to assess the truthfulness of statements, as well as to provide evidence for the assessments as a URL and a text justification. Besides showing that the crowd is able to accurately judge the truthfulness of the statements, we also report results on many different aspects, including: agreement among workers, the effect of different aggregation functions, of scales transformations, and of workers background / bias. We also analyze workers behavior, in terms of queries submitted, URLs found / selected, text justifications, and other behavioral data like clicks and mouse actions collected by means of an ad hoc logger.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Roitero, Kevin; Soprano, Michael; Fan, Shaoyang; Spina, Damiano; Mizzaro, Stefano; Demartini, Gianluca
Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background Proceedings Article
In: Proceedings of the 43st International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2020). Xi’an, China (Online). July 25-30, 2020. Conference Rank: GGS A++, Core A*, pp. 439–448, Association for Computing Machinery, Virtual Event, China, 2020, ISBN: 9781450380164.
@inproceedings{conference-paper-sigir2020,
title = {Can The Crowd Identify Misinformation Objectively? The Effects of Judgment Scale and Assessor's Background},
author = {Kevin Roitero and Michael Soprano and Shaoyang Fan and Damiano Spina and Stefano Mizzaro and Gianluca Demartini},
url = {https://doi.org/10.1145/3397271.3401112},
doi = {10.1145/3397271.3401112},
isbn = {9781450380164},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 43st International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2020). Xi’an, China (Online). July 25-30, 2020. Conference Rank: GGS A++, Core A*},
pages = {439–448},
publisher = {Association for Computing Machinery},
address = {Virtual Event, China},
series = {SIGIR '20},
abstract = {Truthfulness judgments are a fundamental step in the process of fighting misinformation, as they are crucial to train and evaluate classifiers that automatically distinguish true and false statements. Usually such judgments are made by experts, like journalists for political statements or medical doctors for medical statements. In this paper, we follow a different approach and rely on (non-expert) crowd workers. This of course leads to the following research question: Can crowdsourcing be reliably used to assess the truthfulness of information and to create large-scale labeled collections for information credibility systems? To address this issue, we present the results of an extensive study based on crowdsourcing: we collect thousands of truthfulness assessments over two datasets, and we compare expert judgments with crowd judgments, expressed on scales with various granularity levels. We also measure the political bias and the cognitive background of the workers, and quantify their effect on the reliability of the data provided by the crowd.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barbera, David La; Roitero, Kevin; Demartini, Gianluca; Mizzaro, Stefano; Spina, Damiano
Crowdsourcing Truthfulness: The Impact of Judgment Scale and Assessor Bias Proceedings Article
In: Jose, Joemon M.; Yilmaz, Emine; Magalhães, João; Castells, Pablo; Ferro, Nicola; Silva, Mário J.; Martins, Flávio (Ed.): Advances in Information Retrieval - 42nd European Conference on IR Research, ECIR 2020, Lisbon, Portugal, April 14-17, 2020, Proceedings, Part II, pp. 207–214, Springer, 2020.
@inproceedings{DBLP:conf/ecir/BarberaRDMS20,
title = {Crowdsourcing Truthfulness: The Impact of Judgment Scale and Assessor
Bias},
author = {David La Barbera and Kevin Roitero and Gianluca Demartini and Stefano Mizzaro and Damiano Spina},
editor = {Joemon M. Jose and Emine Yilmaz and João Magalhães and Pablo Castells and Nicola Ferro and Mário J. Silva and Flávio Martins},
url = {https://doi.org/10.1007/978-3-030-45442-5_26},
doi = {10.1007/978-3-030-45442-5_26},
year = {2020},
date = {2020-01-01},
booktitle = {Advances in Information Retrieval - 42nd European Conference on IR
Research, ECIR 2020, Lisbon, Portugal, April 14-17, 2020, Proceedings,
Part II},
volume = {12036},
pages = {207--214},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Demartini, Gianluca; Mizzaro, Stefano; Spina, Damiano
Human-in-the-loop Artificial Intelligence for Fighting Online Misinformation: Challenges and Opportunities Journal Article
In: IEEE Data Eng. Bull., vol. 43, no 3, pp. 65–74, 2020.
@article{DBLP:journals/debu/DemartiniMS20,
title = {Human-in-the-loop Artificial Intelligence for Fighting Online Misinformation:
Challenges and Opportunities},
author = {Gianluca Demartini and Stefano Mizzaro and Damiano Spina},
url = {http://sites.computer.org/debull/A20sept/p65.pdf},
year = {2020},
date = {2020-01-01},
journal = {IEEE Data Eng. Bull.},
volume = {43},
number = {3},
pages = {65--74},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Barbera, David La; Polónia, António; Roitero, Kevin; Conde-Sousa, Eduardo; Mea, Vincenzo Della
Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning Journal Article
In: Journal of Imaging, vol. 6, no 9, 2020, ISSN: 2313-433X.
@article{jimaging6090082,
title = {Detection of HER2 from Haematoxylin-Eosin Slides Through a Cascade of Deep Learning Classifiers via Multi-Instance Learning},
author = {David La Barbera and António Polónia and Kevin Roitero and Eduardo Conde-Sousa and Vincenzo Della Mea},
url = {https://www.mdpi.com/2313-433X/6/9/82},
doi = {10.3390/jimaging6090082},
issn = {2313-433X},
year = {2020},
date = {2020-01-01},
journal = {Journal of Imaging},
volume = {6},
number = {9},
abstract = {Breast cancer is the most frequently diagnosed cancer in woman. The correct identification of the HER2 receptor is a matter of major importance when dealing with breast cancer: an over-expression of HER2 is associated with aggressive clinical behaviour; moreover, HER2 targeted therapy results in a significant improvement in the overall survival rate. In this work, we employ a pipeline based on a cascade of deep neural network classifiers and multi-instance learning to detect the presence of HER2 from Haematoxylin-Eosin slides, which partly mimics the pathologist’s behaviour by first recognizing cancer and then evaluating HER2. Our results show that the proposed system presents a good overall effectiveness. Furthermore, the system design is prone to further improvements that can be easily deployed in order to increase the effectiveness score.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Carterette, Ben; Mehrotra, Rishabh; Lalmas, Mounia
Leveraging Behavioral Heterogeneity Across Markets for Cross-Market Training of Recommender Systems Book Chapter
In: Companion Proceedings of the Web Conference 2020, pp. 694–702, Association for Computing Machinery, New York, NY, USA, 2020, ISBN: 9781450370240.
@inbook{10.1145/3366424.3384362,
title = {Leveraging Behavioral Heterogeneity Across Markets for Cross-Market Training of Recommender Systems},
author = {Kevin Roitero and Ben Carterette and Rishabh Mehrotra and Mounia Lalmas},
url = {https://doi.org/10.1145/3366424.3384362},
isbn = {9781450370240},
year = {2020},
date = {2020-01-01},
booktitle = {Companion Proceedings of the Web Conference 2020},
pages = {694–702},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Modern recommender systems are optimised to deliver personalised recommendations to millions of users spread across different geographic regions exhibiting various forms of heterogeneity, including behavioural-, content- and trend specific heterogeneity. System designers often face the challenge of deploying either a single global model across all markets, or developing custom models for different markets. In this work, we focus on the specific case of music recommendation across 21 different markets, and consider the trade-off between developing global model versus market specific models. We begin by investigating behavioural differences across users of different markets, and motivate the need for considering market as an important factor when training models. We propose five different training styles, covering the entire spectrum of models: from a single global model to individual market specific models, and in the process, propose ways to identify and leverage users abroad, and data from similar markets. Based on a large scale experimentation with data for 100M users across 21 different markets, we present insights which highlight that markets play a key role, and describe models that leverage market specific data in serving personalised recommendations.},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
Roitero, Kevin; Bozzato, Cristian; Mea, Vincenzo Della; Mizzaro, Stefano; Serra, Giuseppe
Twitter goes to the Doctor: Detecting Medical Tweets using Machine Learning and BERT. Proceedings Article
In: SIIRH@ ECIR, 2020.
@inproceedings{roitero2020twitter,
title = {Twitter goes to the Doctor: Detecting Medical Tweets using Machine Learning and BERT.},
author = {Kevin Roitero and Cristian Bozzato and Vincenzo Della Mea and Stefano Mizzaro and Giuseppe Serra},
year = {2020},
date = {2020-01-01},
booktitle = {SIIRH@ ECIR},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Amigó, Enrique; Gonzalo, Julio; Mizzaro, Stefano; Carrillo-de-Albornoz, Jorge
An Effectiveness Metric for Ordinal Classification: Formal Properties and Experimental Results Proceedings Article
In: Jurafsky, Dan; Chai, Joyce; Schluter, Natalie; Tetreault, Joel (Ed.): Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 3938–3949, Association for Computational Linguistics, Online, 2020.
@inproceedings{amigo-etal-2020-effectiveness,
title = {An Effectiveness Metric for Ordinal Classification: Formal Properties and Experimental Results},
author = {Enrique Amigó and Julio Gonzalo and Stefano Mizzaro and Jorge Carrillo-de-Albornoz},
editor = {Dan Jurafsky and Joyce Chai and Natalie Schluter and Joel Tetreault},
url = {https://aclanthology.org/2020.acl-main.363},
doi = {10.18653/v1/2020.acl-main.363},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
pages = {3938–3949},
publisher = {Association for Computational Linguistics},
address = {Online},
abstract = {In Ordinal Classification tasks, items have to be assigned to classes that have a relative ordering, such as ``positive'', ``neutral'', ``negative'' in sentiment analysis. Remarkably, the most popular evaluation metrics for ordinal classification tasks either ignore relevant information (for instance, precision/recall on each of the classes ignores their relative ordering) or assume additional information (for instance, Mean Average Error assumes absolute distances between classes). In this paper we propose a new metric for Ordinal Classification, Closeness Evaluation Measure, that is rooted on Measurement Theory and Information Theory. Our theoretical analysis and experimental results over both synthetic data and data from NLP shared tasks indicate that the proposed metric captures quality aspects from different traditional tasks simultaneously. In addition, it generalizes some popular classification (nominal scale) and error minimization (interval scale) metrics, depending on the measurement scale in which it is instantiated.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Amigó, Enrique; Mizzaro, Stefano
On the nature of information access evaluation metrics: a unifying framework Journal Article
In: Information Retrieval Journal, vol. 23, no 3, pp. 318–386, 2020, ISSN: 1573-7659.
@article{Amigo2020,
title = {On the nature of information access evaluation metrics: a unifying framework},
author = {Enrique Amigó and Stefano Mizzaro},
url = {https://doi.org/10.1007/s10791-020-09374-0},
doi = {10.1007/s10791-020-09374-0},
issn = {1573-7659},
year = {2020},
date = {2020-01-01},
journal = {Information Retrieval Journal},
volume = {23},
number = {3},
pages = {318–386},
abstract = {We provide a uniform, general, and complete formal account of evaluation metrics for ranking, classification, clustering, and other information access problems. We leverage concepts from measurement theory, such as scale types and permissible transformation functions, and we capture the nature of evaluation metrics in many tasks by two formal definitions, which lead to a distinction of two metric/tasks families, and provide a comprehensive classification of the tasks that have been proposed so far. We derive some theorems to analyze the suitability (or otherwise) of some common metrics. Within our model we can derive and explain the theoretical properties and drawbacks of the state of the art metrics for multiple tasks. The main contributions of this paper are that, differently from previous studies, the formalization is well grounded on a solid discipline, it is general as it can take into account most effectiveness metrics as well as most existing tasks, and it allows to derive important consequences on metrics and their limitations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Amigó, Enrique; Fang, Hui; Mizzaro, Stefano; Zhai, Chengxiang
Axiomatic thinking for information retrieval: introduction to special issue Journal Article
In: Information Retrieval Journal, vol. 23, no 3, pp. 187–190, 2020, ISSN: 1573-7659.
@article{Amigo2020b,
title = {Axiomatic thinking for information retrieval: introduction to special issue},
author = {Enrique Amigó and Hui Fang and Stefano Mizzaro and Chengxiang Zhai},
url = {https://doi.org/10.1007/s10791-020-09376-y},
doi = {10.1007/s10791-020-09376-y},
issn = {1573-7659},
year = {2020},
date = {2020-01-01},
journal = {Information Retrieval Journal},
volume = {23},
number = {3},
pages = {187–190},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2019
Roitero, Kevin; Soprano, Michael; Mizzaro, Stefano
Bias and Fairness in Effectiveness Evaluation by Means of Network Analysis and Mixture Models Proceedings Article
In: CEUR Workshop Proceedings, pp. 2, CEUR-WS, 2019.
@inproceedings{bias-fairness-19,
title = {Bias and Fairness in Effectiveness Evaluation by Means of Network Analysis and Mixture Models},
author = {Kevin Roitero and Michael Soprano and Stefano Mizzaro},
year = {2019},
date = {2019-10-14},
booktitle = {CEUR Workshop Proceedings},
volume = {2441},
pages = {2},
publisher = {CEUR-WS},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Han, L; Roitero, K; Gadiraju, U; Sarasua, C; Checco, A; Maddalena, E; Demartini, G
The Impact of Task Abandonment in Crowdsourcing Journal Article
In: IEEE Transactions on Knowledge & Data Engineering, no 01, pp. 1-1, 2019, ISSN: 1558-2191.
@article{8873609,
title = {The Impact of Task Abandonment in Crowdsourcing},
author = {L Han and K Roitero and U Gadiraju and C Sarasua and A Checco and E Maddalena and G Demartini},
doi = {10.1109/TKDE.2019.2948168},
issn = {1558-2191},
year = {2019},
date = {2019-10-01},
journal = {IEEE Transactions on Knowledge & Data Engineering},
number = {01},
pages = {1-1},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kevin,; J, Culpepper Shane; Mark, Sanderson; Falk, Scholer; Roitero, Mizzaro Stefano
Fewer topics? A million topics? Both?! On topics subsets in test collections Journal Article
In: Information Retrieval Journal, 2019, ISSN: 1573-7659.
@article{Roitero2019ffew,
title = {Fewer topics? A million topics? Both?! On topics subsets in test collections},
author = {Kevin and Culpepper Shane J and Sanderson Mark and Scholer Falk and Mizzaro Stefano Roitero},
url = {https://doi.org/10.1007/s10791-019-09357-w},
doi = {10.1007/s10791-019-09357-w},
issn = {1573-7659},
year = {2019},
date = {2019-05-08},
journal = {Information Retrieval Journal},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roitero, Kevin; Mizzaro, Stefano; Soprano, Michael
Bias and Fairness in Effectiveness Evaluation by Means of Network Analysis and Mixture Models Proceedings Article
In: Proceedings of the 10th Italian Information Retrieval Workshop, Padova, Italy, September 16-18, 2019., pp. 6–7, 2019.
@inproceedings{DBLP:conf/iir/RoiteroMS19,
title = {Bias and Fairness in Effectiveness Evaluation by Means of Network
Analysis and Mixture Models},
author = {Kevin Roitero and Stefano Mizzaro and Michael Soprano},
url = {http://ceur-ws.org/Vol-2441/paper4.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 10th Italian Information Retrieval Workshop, Padova,
Italy, September 16-18, 2019.},
pages = {6--7},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}