My research is in natural language processing, the subfield of computer science that aims to enable computers to understand and produce human language. I focus mainly on language translation, and am interested in syntactic parsing and other areas as well.
Teaching
Recent and selected publications
Andy Yang, Pascal Bergstr
äßer, Georg Zetzsche, David Chiang, and Anthony W. Lin.
Length generalization bounds for transformers.
In
Proc. ICML. 2026.
To appear.
PDF
BibTeX
@inproceedings{yang-etal-2026-length,
author = {Yang, Andy and Bergstr{\"a\ss}er, Pascal and Zetzsche, Georg and Chiang, David and Lin, Anthony W.},
title = "Length Generalization Bounds for Transformers",
booktitle = "Proc. ICML",
year = "2026",
note = "To appear",
url = "https://arxiv.org/abs/2603.02238"
}
Chihiro Taguchi, Yukinori Takubo, and David Chiang.
Automatic speech recognition for documenting endangered languages: case study of
Ikema
Miyakoan.
In
Proc. Language Resources and Evaluation Conference. 2026.
To appear.
PDF
BibTeX
@inproceedings{taguchi-etal-2026-automatic,
author = "Taguchi, Chihiro and Takubo, Yukinori and Chiang, David",
title = "Automatic Speech Recognition for Documenting Endangered Languages: Case Study of {I}kema {M}iyakoan",
booktitle = "Proc. Language Resources and Evaluation Conference",
year = "2026",
note = "To appear",
url = "https://arxiv.org/abs/2603.26248"
}
Stephen Bothwell, Kaitlin Stephan, Hildegund M
üller, and David Chiang.
From pagin
ā to webpage: on developing and documenting a digitized
Latin collection.
Journal of Open Humanities Data, 2026.
doi:10.5334/johd.397.
DOI
BibTeX
@article{bothwell-etal-2026-pagina,
author = {Bothwell, Stephen and Stephan, Kaitlin and M{\"u}ller, Hildegund and Chiang, David},
title = "From Pagin{\=a} to Webpage: On Developing and Documenting a Digitized {L}atin Collection",
journal = "Journal of Open Humanities Data",
year = "2026",
volume = "11",
article = "61",
doi = "10.5334/johd.397"
}
Akriti Dhasmana, Aarohi Srivastava, and David Chiang.
Dialect matters: cross-lingual
ASR transfer for low-resource
Indic language varieties.
In
Proc. Workshop on NLP for Similar Languages, Varieties and Dialects. 2026.
PDF
BibTeX
@inproceedings{dhasmana-etal-2026-dialect,
author = "Dhasmana, Akriti and Srivastava, Aarohi and Chiang, David",
title = "Dialect Matters: Cross-Lingual {ASR} Transfer for Low-Resource {I}ndic Language Varieties",
url = "https://aclanthology.org/2026.vardial-1.12/",
booktitle = "Proc. Workshop on NLP for Similar Languages, Varieties and Dialects",
year = "2026"
}
Andy Yang, Anej Svete, Jiaoda Li, Anthony Widjaja Lin, Jonathan Rawski, Ryan Cotterell, and David Chiang.
Probability distributions computed by autoregressive transformers.
In
Proc. ICLR. 2026.
To appear.
PDF
BibTeX
@inproceedings{yang-etal-2026-probability,
author = "Yang, Andy and Svete, Anej and Li, Jiaoda and Lin, Anthony Widjaja and Rawski, Jonathan and Cotterell, Ryan and Chiang, David",
title = "Probability Distributions Computed by Autoregressive Transformers",
year = "2026",
booktitle = "Proc. ICLR",
note = "To appear",
url = "https://openreview.net/forum?id=gZIcyx1tQY"
}
Andy Yang, Christopher Watson, Anton Xue, Satwik Bhattamishra, Jose Llarena, William Merrill, Emile Dos Santos Ferreira, Anej Svete, and David Chiang.
The transformer cookbook.
Transactions on Machine Learning Research, January 2026.
PDF
BibTeX
@article{yang-etal-2025-cookbook,
author = "Yang, Andy and Watson, Christopher and Xue, Anton and Bhattamishra, Satwik and Llarena, Jose and Merrill, William and Dos Santos Ferreira, Emile and Svete, Anej and Chiang, David",
title = "The Transformer Cookbook",
journal = "Transactions on Machine Learning Research",
month = "January",
year = "2026",
url = "https://openreview.net/forum?id=sPshCSvDrX"
}
Katsumi Ibaraki and David Chiang.
Frustratingly easy data augmentation for low-resource
ASR.
2025.
arXiv:2509.15373.
PDF
BibTeX
@misc{ibaraki-chiang-2025-frustratingly,
author = "Ibaraki, Katsumi and Chiang, David",
note = "{arXiv}:2509.15373",
year = "2025",
title = "Frustratingly Easy Data Augmentation for Low-Resource {ASR}",
url = "https://arxiv.org/abs/2509.15373"
}
Chihiro Taguchi, Seng Mai, Keita Kurabe, Yusuke Sakai, Georgina Agyei, Soudabeh Eslami, and David Chiang.
Languages still left behind: toward a better multilingual machine translation benchmark.
In
Proc. EMNLP, 20142–20154. 2025.
doi:10.18653/v1/2025.emnlp-main.1018.
PDF
BibTeX
@inproceedings{taguchi-etal-2025-languages,
author = "Taguchi, Chihiro and Mai, Seng and Kurabe, Keita and Sakai, Yusuke and Agyei, Georgina and Eslami, Soudabeh and Chiang, David",
title = "Languages Still Left Behind: Toward a Better Multilingual Machine Translation Benchmark",
year = "2025",
booktitle = "Proc. EMNLP",
url = "https://aclanthology.org/2025.emnlp-main.1018/",
doi = "10.18653/v1/2025.emnlp-main.1018",
pages = "20142--20154"
}
Andy Yang, Micha
ël Cadilhac, and David Chiang.
Knee-deep in
C-RASP: a transformer depth hierarchy.
In
Proc. NeurIPS 38. 2025.
To appear.
PDF
BibTeX
@inproceedings{yang+:2025,
author = {Yang, Andy and Cadilhac, Micha{\"e}l and Chiang, David},
title = "Knee-Deep in {C-RASP}: A Transformer Depth Hierarchy",
booktitle = "Proc. NeurIPS 38",
note = "To appear",
url = "https://arxiv.org/abs/2506.16055",
year = "2025"
}
Andy Yang, Lena Strobl, David Chiang, and Dana Angluin.
Simulating hard attention using soft attention.
Transactions of the Association for Computational Linguistics, 14:147–166, 2026.
doi:10.1162/TACL.a.597.
DOI
BibTeX
@article{yang-etal-2025-softmax,
author = "Yang, Andy and Strobl, Lena and Chiang, David and Angluin, Dana",
title = "Simulating Hard Attention Using Soft Attention",
year = "2026",
volume = "14",
pages = "147--166",
journal = "Transactions of the Association for Computational Linguistics",
doi = "10.1162/TACL.a.597"
}
Aarohi Srivastava and David Chiang.
We're calling an intervention: exploring fundamental hurdles in adapting language models to nonstandard text.
In
Proc. Workshop on Noisy and User-Generated Text. 2025.
Best Paper Award.
PDF
BibTeX
@inproceedings{srivastava-chiang-2025,
author = "Srivastava, Aarohi and Chiang, David",
title = "We're Calling an Intervention: Exploring Fundamental Hurdles in Adapting Language Models to Nonstandard Text",
booktitle = "Proc. Workshop on Noisy and User-Generated Text",
year = "2025",
url = "https://arxiv.org/abs/2404.07304"
}
David Chiang.
Transformers in uniform
TC\(^0\).
Transactions on Machine Learning Research, January 2025.
PDF
BibTeX
@article{chiang:2025,
author = "Chiang, David",
title = "Transformers in Uniform {TC$^0$}",
journal = "Transactions on Machine Learning Research",
year = "2025",
month = "January",
url = "https://openreview.net/forum?id=ZA7D4nQuQF"
}
Lena Strobl, Dana Angluin, David Chiang, Jonathan Rawski, and Ashish Sabharwal.
Transformers as transducers.
Transactions of the Association for Computational Linguistics, 13:200–219, 2025.
doi:10.1162/tacl_a_00736.
DOI
BibTeX
@article{strobl-etal-2025-transducers,
author = "Strobl, Lena and Angluin, Dana and Chiang, David and Rawski, Jonathan and Sabharwal, Ashish",
title = "Transformers as Transducers",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
pages = "200--219",
year = "2025",
doi = "10.1162/tacl\_a\_00736"
}
Chihiro Taguchi and David Chiang.
Language complexity and speech recognition accuracy: orthographic complexity hurts, phonological complexity doesn't.
In
Proc. ACL. 2024.
Outstanding Paper Award and Senior Area Chair Award.
PDF
BibTeX
@inproceedings{taguchi-chiang-2024-complexity,
author = "Taguchi, Chihiro and Chiang, David",
title = "Language Complexity and Speech Recognition Accuracy: Orthographic Complexity Hurts, Phonological Complexity Doesn't",
year = "2024",
url = "https://aclanthology.org/2024.acl-long.827/",
booktitle = "Proc. ACL"
}
Fahim Faisal, Orevaoghene Ahia, Aarohi Srivastava, Kabir Ahuja, David Chiang, Yulia Tsvetkov, and Antonios Anastasopoulos.
DIALECTBENCH: a
NLP benchmark for dialects, varieties, and closely-related languages.
In
Proc. ACL. 2024.
Social Impact Award.
PDF
BibTeX
@inproceedings{faisal+:2024,
author = "Faisal, Fahim and Ahia, Orevaoghene and Srivastava, Aarohi and Ahuja, Kabir and Chiang, David and Tsvetkov, Yulia and Anastasopoulos, Antonios",
title = "{DIALECTBENCH}: A {NLP} Benchmark for Dialects, Varieties, and Closely-Related Languages",
year = "2024",
booktitle = "Proc. ACL",
url = "https://aclanthology.org/2024.acl-long.777/"
}
David Chiang, Colin McDonald, and Chung
-chieh Shan.
Exact recursive probabilistic programming.
PACMPL, 2023.
doi:10.1145/3586050.
PDF
BibTeX
@article{chiang+mcdonald+shan:2023,
author = "Chiang, David and McDonald, Colin and Shan, Chung{-}chieh",
title = "Exact Recursive Probabilistic Programming",
journal = "PACMPL",
volume = "7",
number = "OOPSLA1",
article = "98",
xmonth = "April",
url = "https://dl.acm.org/doi/10.1145/3586050",
year = "2023",
doi = "10.1145/3586050"
}
full list