ICCK Transactions on Emerging Topics in Artificial Intelligence
ISSN: 3068-6652 (Online)
Email: [email protected]

Submit Manuscript
Edit a Special Issue

TY - JOUR AU - Liu, Zhangqi PY - 2025 DA - 2025/09/14 TI - Reinforcement Learning for Prompt Optimization in Language Models: A Comprehensive Survey of Methods, Representations, and Evaluation Challenges JO - ICCK Transactions on Emerging Topics in Artificial Intelligence T2 - ICCK Transactions on Emerging Topics in Artificial Intelligence JF - ICCK Transactions on Emerging Topics in Artificial Intelligence VL - 2 IS - 4 SP - 173 EP - 181 DO - 10.62762/TETAI.2025.790504 UR - https://www.icck.org/article/abs/TETAI.2025.790504 KW - prompt engineering KW - reinforcement learning KW - language models KW - prompt optimization KW - reward design KW - prompt representation AB - The growing prominence of prompt engineering as a means of controlling large language models has given rise to a diverse set of methods, ranging from handcrafted templates to embedding-level tuning. Yet, as prompts increasingly serve not merely as input scaffolds but as adaptive interfaces between users and models, the question of how to systematically optimize them remains unresolved. Reinforcement learning, with its capacity for sequential decision-making and reward-driven adaptation, has been proposed as a possible framework for discovering effective prompting strategies. This survey explores the emerging intersection of RL and prompt engineering, organizing existing research along three interdependent axes: the representation of prompts (symbolic, soft, and hybrid), the design of RL-based optimization mechanisms, and the challenges of evaluating and generalizing learned prompt policies. Rather than presenting a single unified framework, the discussion reflects the fragmented, often experimental nature of current approaches, many of which remain constrained by unstable reward signals, limited generalizability, and a lack of reproducible evaluation standards. By analyzing methodological innovations and points of friction alike, this work aims to foster a more critical and reflective understanding of what it means to "learn to prompt" in complex, real-world language modeling contexts. SN - 3068-6652 PB - Institute of Central Computation and Knowledge LA - English ER -
@article{Liu2025Reinforcem,
author = {Zhangqi Liu},
title = {Reinforcement Learning for Prompt Optimization in Language Models: A Comprehensive Survey of Methods, Representations, and Evaluation Challenges},
journal = {ICCK Transactions on Emerging Topics in Artificial Intelligence},
year = {2025},
volume = {2},
number = {4},
pages = {173-181},
doi = {10.62762/TETAI.2025.790504},
url = {https://www.icck.org/article/abs/TETAI.2025.790504},
abstract = {The growing prominence of prompt engineering as a means of controlling large language models has given rise to a diverse set of methods, ranging from handcrafted templates to embedding-level tuning. Yet, as prompts increasingly serve not merely as input scaffolds but as adaptive interfaces between users and models, the question of how to systematically optimize them remains unresolved. Reinforcement learning, with its capacity for sequential decision-making and reward-driven adaptation, has been proposed as a possible framework for discovering effective prompting strategies. This survey explores the emerging intersection of RL and prompt engineering, organizing existing research along three interdependent axes: the representation of prompts (symbolic, soft, and hybrid), the design of RL-based optimization mechanisms, and the challenges of evaluating and generalizing learned prompt policies. Rather than presenting a single unified framework, the discussion reflects the fragmented, often experimental nature of current approaches, many of which remain constrained by unstable reward signals, limited generalizability, and a lack of reproducible evaluation standards. By analyzing methodological innovations and points of friction alike, this work aims to foster a more critical and reflective understanding of what it means to "learn to prompt" in complex, real-world language modeling contexts.},
keywords = {prompt engineering, reinforcement learning, language models, prompt optimization, reward design, prompt representation},
issn = {3068-6652},
publisher = {Institute of Central Computation and Knowledge}
}
Copyright © 2025 by the Author(s). Published by Institute of Central Computation and Knowledge. This article is an open access article distributed under the terms and conditions of the Creative Commons Attribution (CC BY) license (https://creativecommons.org/licenses/by/4.0/), which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made. ICCK Transactions on Emerging Topics in Artificial Intelligence
ISSN: 3068-6652 (Online)
Email: [email protected]
Portico
All published articles are preserved here permanently:
https://www.portico.org/publishers/icck/