@inproceedings{38479d452b5049b49c1df65ed193039d,
title = "Comparative analysis of topic modelling approaches on student feedback",
abstract = "Topic modelling, a type of clustering for textual data, is a popular method to extract themes from text. Methods such as Latent Dirichlet Allocation (LDA), Latent Semantic Analysis (LSA) and Non-negative Matrix Factorization (NMF) have been successfully used across a wide range of applications. Large Language Models, such as BERT, have led to significant improvements in machine learning tasks for textual data in general, as well as topic modelling, in particular. In this paper, we compare the performance of a BERT-based topic modelling approach with LDA, LSA and NMF on textual feedback from students about their mental health and remote learning experience during the COVID-19 pandemic. While all methods lead to coherent and distinct topics, the BERT-based approach and NMF are able to identify more fine-grained topics. Moreover, while NMF resulted in more detailed topics about the students{\textquoteright} mental health-related experiences, the BERT-based approach produced more detailed topics about the students{\textquoteright} experiences with remote learning.",
keywords = "Topic Modelling, BERT, LDA, LSA, NMF, educational apartheid",
author = "Hayat, {Faiz Iqbal} and Safwan Shatnawi and Ella Haig",
year = "2024",
month = nov,
day = "24",
doi = "10.5220/0012890400003838",
language = "English",
volume = "1",
publisher = "SciTePress",
pages = "226--233",
editor = "Frans Coenen and Ana Fred and Jorge Bernardino",
booktitle = "Proceedings of the 16th International Joint Conference on Knowledge Discovery, Knowledge Engineering and Knowledge Management",
note = "16th International Joint Conference on Knowledge Discovery, Knowledge Engineering and Knowledge Management ; Conference date: 17-11-2024 Through 19-11-2024",
}