<?xml version="1.0"?>
<dblpperson name="Jiaoda Li" pid="299/1900" n="18">
<person key="homepages/299/1900" mdate="2021-08-11">
<author pid="299/1900">Jiaoda Li</author>
</person>
<r><article publtype="informal" key="journals/corr/abs-2605-00768" mdate="2026-06-04">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>Characterizing the Expressivity of Local Attention in Transformers.</title>
<year>2026</year>
<month>May</month>
<volume>abs/2605.00768</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2605.00768</ee>
<url>db/journals/corr/corr2605.html#abs-2605-00768</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><inproceedings key="conf/acl/JeradSLC25" mdate="2025-09-25">
<author pid="402/1063">Selim Jerad</author>
<author pid="259/1164">Anej Svete</author>
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>Unique Hard Attention: A Tale of Two Sides.</title>
<pages>977-996</pages>
<year>2025</year>
<booktitle>ACL (2)</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2025.acl-short.76</ee>
<crossref>conf/acl/2025-2</crossref>
<url>db/conf/acl/acl2025-2.html#JeradSLC25</url>
</inproceedings>
</r>
<r><inproceedings key="conf/nips/LiC25" mdate="2026-06-15">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>Characterizing the Expressivity of Fixed-Precision Transformer Language Models.</title>
<year>2025</year>
<booktitle>NeurIPS</booktitle>
<ee type="oa">http://papers.nips.cc/paper_files/paper/2025/hash/e9e250537b0345111d50a5f8f392cffc-Abstract-Conference.html</ee>
<crossref>conf/nips/2025</crossref>
<url>db/conf/nips/neurips2025.html#LiC25</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2503-14615" mdate="2025-04-14">
<author pid="402/1063">Selim Jerad</author>
<author pid="259/1164">Anej Svete</author>
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>Unique Hard Attention: A Tale of Two Sides.</title>
<year>2025</year>
<month>March</month>
<volume>abs/2503.14615</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2503.14615</ee>
<url>db/journals/corr/corr2503.html#abs-2503-14615</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2505-23623" mdate="2025-06-29">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>Characterizing the Expressivity of Transformer Language Models.</title>
<year>2025</year>
<month>May</month>
<volume>abs/2505.23623</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2505.23623</ee>
<url>db/journals/corr/corr2505.html#abs-2505-23623</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2510-27118" mdate="2025-11-17">
<author pid="92/3951">Andy Yang</author>
<author pid="259/1164">Anej Svete</author>
<author pid="299/1900">Jiaoda Li</author>
<author pid="38/2655">Anthony Widjaja Lin</author>
<author pid="243/3078">Jonathan Rawski</author>
<author pid="146/4361">Ryan Cotterell</author>
<author pid="89/233-1">David Chiang 0001</author>
<title>Probability Distributions Computed by Hard-Attention Transformers.</title>
<year>2025</year>
<month>October</month>
<volume>abs/2510.27118</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2510.27118</ee>
<url>db/journals/corr/corr2510.html#abs-2510-27118</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><inproceedings key="conf/acl/LiHSC24" mdate="2024-09-24">
<author pid="299/1900">Jiaoda Li</author>
<author pid="80/7559">Yifan Hou</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>What Do Language Models Learn in Context? The Structured Task Hypothesis.</title>
<pages>12365-12379</pages>
<year>2024</year>
<booktitle>ACL (1)</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2024.acl-long.669</ee>
<ee type="oa">https://aclanthology.org/2024.acl-long.669</ee>
<crossref>conf/acl/2024-1</crossref>
<url>db/conf/acl/acl2024-1.html#LiHSC24</url>
</inproceedings>
</r>
<r><inproceedings key="conf/naacl/LiWSC24" mdate="2024-09-11">
<author pid="299/1900">Jiaoda Li</author>
<author pid="293/7051">Jennifer C. White</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>A Transformer with Stack Attention.</title>
<pages>4318-4335</pages>
<year>2024</year>
<booktitle>NAACL-HLT (Findings)</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2024.findings-naacl.269</ee>
<crossref>conf/naacl/2024f</crossref>
<url>db/conf/naacl/naacl2024f.html#LiWSC24</url>
<stream>streams/conf/naacl</stream>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2405-04515" mdate="2024-06-13">
<author pid="299/1900">Jiaoda Li</author>
<author pid="293/7051">Jennifer C. White</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>A Transformer with Stack Attention.</title>
<year>2024</year>
<volume>abs/2405.04515</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2405.04515</ee>
<url>db/journals/corr/corr2405.html#abs-2405-04515</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2406-04216" mdate="2025-04-01">
<author pid="299/1900">Jiaoda Li</author>
<author orcid="0000-0002-3197-4460" pid="80/7559">Yifan Hou</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<author pid="146/4361">Ryan Cotterell</author>
<title>What Do Language Models Learn in Context? The Structured Task Hypothesis.</title>
<year>2024</year>
<volume>abs/2406.04216</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2406.04216</ee>
<url>db/journals/corr/corr2406.html#abs-2406-04216</url>
</article>
</r>
<r><inproceedings key="conf/emnlp/HouLFSZZBS23" mdate="2024-05-07">
<author pid="80/7559">Yifan Hou</author>
<author pid="299/1900">Jiaoda Li</author>
<author pid="181/2843">Yu Fei</author>
<author pid="329/3838">Alessandro Stolfo</author>
<author pid="245/8640">Wangchunshu Zhou</author>
<author pid="264/9714">Guangtao Zeng</author>
<author orcid="0000-0001-8968-9649" pid="184/3742">Antoine Bosselut</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Towards a Mechanistic Interpretation of Multi-Step Reasoning Capabilities of Language Models.</title>
<pages>4902-4919</pages>
<year>2023</year>
<booktitle>EMNLP</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2023.emnlp-main.299</ee>
<ee type="oa">https://aclanthology.org/2023.emnlp-main.299</ee>
<crossref>conf/emnlp/2023</crossref>
<url>db/conf/emnlp/emnlp2023.html#HouLFSZZBS23</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2310-14491" mdate="2025-04-01">
<author orcid="0000-0002-3197-4460" pid="80/7559">Yifan Hou</author>
<author pid="299/1900">Jiaoda Li</author>
<author pid="181/2843">Yu Fei</author>
<author pid="329/3838">Alessandro Stolfo</author>
<author pid="245/8640">Wangchunshu Zhou</author>
<author pid="264/9714">Guangtao Zeng</author>
<author pid="184/3742">Antoine Bosselut</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Towards a Mechanistic Interpretation of Multi-Step Reasoning Capabilities of Language Models.</title>
<year>2023</year>
<volume>abs/2310.14491</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2310.14491</ee>
<url>db/journals/corr/corr2310.html#abs-2310-14491</url>
</article>
</r>
<r><inproceedings key="conf/naacl/LiCS22" mdate="2022-08-01">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Probing via Prompting.</title>
<pages>1144-1157</pages>
<year>2022</year>
<booktitle>NAACL-HLT</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2022.naacl-main.84</ee>
<ee type="oa">https://aclanthology.org/2022.naacl-main.84</ee>
<crossref>conf/naacl/2022</crossref>
<url>db/conf/naacl/naacl2022.html#LiCS22</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2207-01736" mdate="2022-07-06">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Probing via Prompting.</title>
<year>2022</year>
<volume>abs/2207.01736</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2207.01736</ee>
<url>db/journals/corr/corr2207.html#abs-2207-01736</url>
</article>
</r>
<r><article key="journals/tacl/LiCS21" mdate="2024-06-19">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Differentiable Subset Pruning of Transformer Heads.</title>
<pages>1442-1459</pages>
<year>2021</year>
<volume>9</volume>
<journal>Trans. Assoc. Comput. Linguistics</journal>
<ee type="oa">https://doi.org/10.1162/tacl_a_00436</ee>
<url>db/journals/tacl/tacl9.html#LiCS21</url>
</article>
</r>
<r><inproceedings key="conf/emnlp/LiAS21" mdate="2022-06-23">
<author pid="299/1900">Jiaoda Li</author>
<author pid="186/7228">Duygu Ataman</author>
<author orcid="0000-0002-1438-4741" pid="00/8341">Rico Sennrich</author>
<title>Vision Matters When It Should: Sanity Checking Multimodal Machine Translation Models.</title>
<pages>8556-8562</pages>
<year>2021</year>
<booktitle>EMNLP (1)</booktitle>
<ee type="oa">https://doi.org/10.18653/v1/2021.emnlp-main.673</ee>
<ee type="oa">https://aclanthology.org/2021.emnlp-main.673</ee>
<crossref>conf/emnlp/2021-1</crossref>
<url>db/conf/emnlp/emnlp2021-1.html#LiAS21</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2108-04657" mdate="2021-08-11">
<author pid="299/1900">Jiaoda Li</author>
<author pid="146/4361">Ryan Cotterell</author>
<author pid="86/10440">Mrinmaya Sachan</author>
<title>Differentiable Subset Pruning of Transformer Heads.</title>
<year>2021</year>
<volume>abs/2108.04657</volume>
<journal>CoRR</journal>
<ee type="oa">https://arxiv.org/abs/2108.04657</ee>
<url>db/journals/corr/corr2108.html#abs-2108-04657</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2109-03415" mdate="2021-09-20">
<author pid="299/1900">Jiaoda Li</author>
<author pid="186/7228">Duygu Ataman</author>
<author pid="00/8341">Rico Sennrich</author>
<title>Vision Matters When It Should: Sanity Checking Multimodal Machine Translation Models.</title>
<year>2021</year>
<volume>abs/2109.03415</volume>
<journal>CoRR</journal>
<ee type="oa">https://arxiv.org/abs/2109.03415</ee>
<url>db/journals/corr/corr2109.html#abs-2109-03415</url>
</article>
</r>
<coauthors n="17" nc="1">
<co c="0"><na f="a/Ataman:Duygu" pid="186/7228">Duygu Ataman</na></co>
<co c="0"><na f="b/Bosselut:Antoine" pid="184/3742">Antoine Bosselut</na></co>
<co c="0"><na f="c/Chiang_0001:David" pid="89/233-1">David Chiang 0001</na></co>
<co c="0"><na f="c/Cotterell:Ryan" pid="146/4361">Ryan Cotterell</na></co>
<co c="0"><na f="f/Fei:Yu" pid="181/2843">Yu Fei</na></co>
<co c="0"><na f="h/Hou:Yifan" pid="80/7559">Yifan Hou</na></co>
<co c="0"><na f="j/Jerad:Selim" pid="402/1063">Selim Jerad</na></co>
<co c="0"><na f="l/Lin:Anthony_Widjaja" pid="38/2655">Anthony Widjaja Lin</na></co>
<co c="0"><na f="r/Rawski:Jonathan" pid="243/3078">Jonathan Rawski</na></co>
<co c="0"><na f="s/Sachan:Mrinmaya" pid="86/10440">Mrinmaya Sachan</na></co>
<co c="0"><na f="s/Sennrich:Rico" pid="00/8341">Rico Sennrich</na></co>
<co c="0"><na f="s/Stolfo:Alessandro" pid="329/3838">Alessandro Stolfo</na></co>
<co c="0"><na f="s/Svete:Anej" pid="259/1164">Anej Svete</na></co>
<co c="0"><na f="w/White:Jennifer_C=" pid="293/7051">Jennifer C. White</na></co>
<co c="0"><na f="y/Yang:Andy" pid="92/3951">Andy Yang</na></co>
<co c="0"><na f="z/Zeng:Guangtao" pid="264/9714">Guangtao Zeng</na></co>
<co c="0"><na f="z/Zhou:Wangchunshu" pid="245/8640">Wangchunshu Zhou</na></co>
</coauthors>
</dblpperson>

