@article {10.3844/jcssp.2026.552.565,
article_type = {journal},
title = {Optimized WFST-Based ASR for Arabic: Balancing Memory, Speed, and Accuracy},
author = {Sultan, Wael A. and Semary, Mourad S. and Abdou, Sherif M.},
volume = {22},
number = {2},
year = {2026},
month = {Feb},
pages = {552-565},
doi = {10.3844/jcssp.2026.552.565},
url = {https://thescipub.com/abstract/jcssp.2026.552.565},
abstract = {Weighted Finite-State Transducers (WFSTs) have revolutionized Automatic Speech Recognition (ASR) by enabling significantly faster decoding speeds compared to traditional systems that build the search space progressively. However, applying WFSTs to morphology-rich languages such as Arabic presents challenges due to the large vocabulary, resulting in extensive networks that exceed the memory capacity of standard CPUs. This study introduces various strategies to reduce the size of large vocabulary Arabic WFSTs with minimal impact on accuracy. We employed a star architecture for the network topology, which effectively reduced the network size and improved the decoding speed. Additionally, a two-pass decoding approach was adopted: the first pass used a smaller network with a short history language model, and the second pass rescored the produced lattice with a longer history language model. We explored several tuning parameters to find the optimal balance between network size and accuracy. Our results show that by using an optimized search graph built with a 2-gram language model instead of a 3-gram model, we achieve a 45%reduction in the graph&#039;s memory footprint with a negligible accuracy loss of less than 0.2% MR-WER. On the MGB3 benchmark, our method achieved 40x real-time Arabic ASR data processing with an accuracy of 83.67%, compared to the 85.82% accuracy of state-of-the-art systems, which only achieve 8x real-time performance on standard CPUs.},
journal = {Journal of Computer Science},
publisher = {Science Publications}
}