LuffyTheFox commited on
Commit
b8ce18c
·
verified ·
1 Parent(s): 66fadad

Upload chat_template_thinking.jinja

Browse files
Files changed (1) hide show
  1. chat_template_thinking.jinja +422 -0
chat_template_thinking.jinja ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- ===== HELPER: raise_exception macro =====
2
+ Jinja2 doesn't have a built-in raise_exception.
3
+ This macro outputs an error marker in the rendered output.
4
+ Callers should check output for "ERROR:" pattern to detect validation failures.
5
+ -#}
6
+ {%- macro raise_exception(message) -%}
7
+ {{- '\n[ERROR: ' ~ message ~ ']' -}}
8
+ {%- endmacro -%}
9
+
10
+ {#- ===== SECTION 1A: MACRO render_content =====
11
+ Handles string, list (image/video/text items), or None/undefined.
12
+ count_vision=true: increments ns.image_count / ns.video_count.
13
+ is_system_content=false: Set true when rendering system/developer content
14
+ to enable media validation (raises exception).
15
+ count_vision=true: increments vision counters.
16
+ -#}
17
+ {%- macro render_content(content, count_vision=false, is_system_content=false) -%}
18
+ {#- VALIDATION: System messages cannot contain images or videos (from v18) -#}
19
+ {#- FIX: also exclude strings and handle None - llama.cpp treats strings as non-iterable in for loops -#}
20
+ {%- if is_system_content and content is iterable and content is not mapping and content is not string and content is not none -%}
21
+ {%- for item in content -%}
22
+ {%- if item.type == 'image' or 'image' in item or 'image_url' in item -%}
23
+ {{- raise_exception('System message cannot contain images.') -}}
24
+ {%- endif -%}
25
+ {%- if item.type == 'video' or 'video' in item -%}
26
+ {{- raise_exception('System message cannot contain videos.') -}}
27
+ {%- endif -%}
28
+ {%- endfor -%}
29
+ {%- endif -%}
30
+
31
+ {#- Main content rendering -#}
32
+ {#- Handle None/undefined content -#}
33
+ {%- if content is none or content is defined == false -%}
34
+ {{- '' -}}
35
+ {%- elif content is string -%}
36
+ {{- content -}}
37
+ {#- FIX: also exclude strings - llama.cpp treats strings as non-iterable in for loops -#}
38
+ {%- elif content is iterable and content is not mapping and content is not string -%}
39
+ {%- for item in content -%}
40
+ {#- Handle different item types -#}
41
+ {%- if item.type == 'image' or 'image' in item or 'image_url' in item -%}
42
+ {%- if count_vision -%}{%- set ns.image_count = ns.image_count + 1 -%}{%- endif -%}
43
+ {%- if add_vision_id is defined and add_vision_id -%}
44
+ {{- 'Picture ' ~ ns.image_count ~ ': ' -}}
45
+ {%- endif -%}
46
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' -}}
47
+ {%- elif item.type == 'video' or 'video' in item -%}
48
+ {%- if count_vision -%}{%- set ns.video_count = ns.video_count + 1 -%}{%- endif -%}
49
+ {%- if add_vision_id is defined and add_vision_id -%}
50
+ {{- 'Video ' ~ ns.video_count ~ ': ' -}}
51
+ {%- endif -%}
52
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' -}}
53
+ {%- elif item.type == 'text' or 'text' in item -%}
54
+ {{- item.text -}}
55
+ {#- ERROR: Unknown content type - raise explicit exception (from v18) -#}
56
+ {%- else -%}
57
+ {{- raise_exception('Unexpected content type in message content.') -}}
58
+ {%- endif -%}
59
+ {%- endfor -%}
60
+ {#- ERROR: Unknown content type - raise explicit exception (from v18) -#}
61
+ {%- elif content is not none and content is defined -%}
62
+ {{- raise_exception('Unexpected content type.') -}}
63
+ {%- endif -%}
64
+ {%- endmacro -%}
65
+
66
+ {#- ===== SECTION 1B: MACRO detect_tool_error (NEW in v0.7) =====
67
+ Detects if a tool response contains error indicators.
68
+ Uses heuristics from v18:
69
+ - Checks for error keywords (error, exception, traceback, failed to)
70
+ - Ignores responses with '$ ' (shell output prefix) or 'took ' (timing info)
71
+ - Ignores responses > 500 chars (likely valid output, not error)
72
+
73
+ Returns: ns.last_tool_failed (true/false)
74
+ Side effect: Updates ns.consecutive_failures counter
75
+ -#}
76
+ {%- macro detect_tool_error(content) -%}
77
+ {#- Type guard: ensure content is string (llama.cpp compatibility) -#}
78
+ {%- set content = content if content is string else '' -%}
79
+ {%- set content_lower = content | lower -%}
80
+ {%- set content_length = content | length -%}
81
+
82
+ {#- Error detection heuristics: short response + no shell prefix + has error keywords -#}
83
+ {%- if content_length < 500
84
+ and '$ ' not in content
85
+ and 'took ' not in content_lower
86
+ and ('"error":' in content_lower or 'error:' in content_lower
87
+ or 'exception:' in content_lower or 'traceback' in content_lower
88
+ or 'command not found' in content_lower or 'invalid syntax' in content_lower
89
+ or 'failed to' in content_lower or 'permission denied' in content_lower) -%}
90
+ {#- Error detected - update failure tracking -#}
91
+ {%- set ns.last_tool_failed = true -%}
92
+ {%- set ns.consecutive_failures = ns.consecutive_failures + 1 -%}
93
+ {%- else -%}
94
+ {#- No error - reset failure tracking -#}
95
+ {%- set ns.last_tool_failed = false -%}
96
+ {%- set ns.consecutive_failures = 0 -%}
97
+ {%- endif -%}
98
+ {%- endmacro -%}
99
+
100
+ {#- ===== SECTION 2: NAMESPACE INITIALISATION =====
101
+ Single ns object for all mutable state.
102
+
103
+ enable_thinking: default=true (controls think-block in generation prompt)
104
+ preserve_thinking: default=true (controls think-block display in conversation history)
105
+ image_count: Vision counter for images
106
+ video_count: Vision counter for videos
107
+
108
+ NEW in v0.7:
109
+ - consecutive_failures: Tracks consecutive tool call failures (from v18)
110
+ - last_tool_failed: Boolean flag for current tool response (from v18)
111
+ -#}
112
+ {%- set ns = namespace(
113
+ enable_thinking=true,
114
+ preserve_thinking=true,
115
+ image_count=0,
116
+ video_count=0,
117
+ consecutive_failures=0,
118
+ last_tool_failed=false
119
+ ) -%}
120
+
121
+ {#- Resolve enable_thinking kwarg -#}
122
+ {%- if enable_thinking is defined -%}
123
+ {%- if enable_thinking -%}
124
+ {%- set ns.enable_thinking = true -%}
125
+ {%- else -%}
126
+ {%- set ns.enable_thinking = false -%}
127
+ {%- endif -%}
128
+ {%- endif -%}
129
+
130
+ {#- Resolve preserve_thinking kwarg (FIXED in v0.7: now also affects conversation history, not just generation prompt).
131
+ preserve_thinking=false => force non-thinking mode (same as enable_thinking=false).
132
+ preserve_thinking=true => default, no override (thinking controlled by enable_thinking).
133
+ When not defined => default, no override.
134
+ -#}
135
+ {%- if preserve_thinking is defined -%}
136
+ {%- if not preserve_thinking -%}
137
+ {%- set ns.enable_thinking = false -%}
138
+ {%- set ns.preserve_thinking = false -%}
139
+ {%- else -%}
140
+ {%- set ns.preserve_thinking = true -%}
141
+ {%- endif -%}
142
+ {%- endif -%}
143
+
144
+ {#- ===== SECTION 3: PRE-SCAN =====
145
+ Track last /no_think or /think flag in user messages.
146
+ Also scan system messages for <|think_off|> / <|think_on|> markers
147
+ (allows apps to control thinking mode via system prompt injection).
148
+ The model follows the last flag encountered in multi-turn conversations.
149
+ -#}
150
+ {%- for i in range(messages | length) -%}
151
+ {%- set _msg = messages[i] -%}
152
+ {%- if _msg.role == 'user' -%}
153
+ {%- set _u = _msg.content if _msg.content is string else '' -%}
154
+ {%- if _u.rstrip().endswith('/no_think') -%}
155
+ {%- set ns.enable_thinking = false -%}
156
+ {%- elif _u.rstrip().endswith('/think') -%}
157
+ {%- set ns.enable_thinking = true -%}
158
+ {%- endif -%}
159
+ {%- elif _msg.role == 'system' or _msg.role == 'developer' -%}
160
+ {%- set _s = _msg.content if _msg.content is string else '' -%}
161
+ {%- if '<|think_off|>' in _s -%}
162
+ {%- set ns.enable_thinking = false -%}
163
+ {%- elif '<|think_on|>' in _s -%}
164
+ {%- set ns.enable_thinking = true -%}
165
+ {%- endif -%}
166
+ {%- endif -%}
167
+ {%- endfor -%}
168
+
169
+ {#- ===== SECTION 4: VALIDATE MESSAGES (NEW in v0.7) =====
170
+ Validate that messages is provided and not empty.
171
+ From v18: raises exception if no messages provided.
172
+ -#}
173
+ {%- if not messages -%}
174
+ {{- raise_exception('No messages provided.') -}}
175
+ {%- endif -%}
176
+
177
+ {#- ===== SECTION 5: COLLECT SYSTEM CONTENT =====
178
+ Merge all system/developer messages with \n\n separator.
179
+ <|think_off|> / <|think_on|> markers are stripped from output.
180
+
181
+ FIXED in v0.7: Pass is_system_content=true to render_content to trigger
182
+ media validation (raises exception if system contains images/videos).
183
+ -#}
184
+ {%- set ns_sys = namespace(content='') -%}
185
+ {%- for msg in messages -%}
186
+ {%- if msg.role == 'system' or msg.role == 'developer' -%}
187
+ {#- Pass is_system_content=true for media validation -#}
188
+ {%- set _c = render_content(msg.content | default(''), false, true) | trim -%}
189
+ {%- set _c = _c | replace('<|think_off|>', '') | replace('<|think_on|>', '') | trim -%}
190
+ {%- if _c -%}
191
+ {%- if ns_sys.content == '' -%}
192
+ {%- set ns_sys.content = _c -%}
193
+ {%- else -%}
194
+ {%- set ns_sys.content = ns_sys.content + '\n\n' + _c -%}
195
+ {%- endif -%}
196
+ {%- endif -%}
197
+ {%- endif -%}
198
+ {%- endfor -%}
199
+
200
+ {#- ===== SECTION 6: BUILD TOOLS LIST =====
201
+ Normalise each tool to {"type":"function","function":{...}} format.
202
+ Serialisation happens later at output time (avoids Markup + str escaping bugs).
203
+ -#}
204
+ {%- set _has_tools = tools is defined and tools -%}
205
+ {%- if _has_tools -%}
206
+ {%- set ns_tb = namespace(list=[]) -%}
207
+ {%- for tool in tools -%}
208
+ {%- if tool.function is defined -%}
209
+ {%- set ns_tb.list = ns_tb.list + [tool] -%}
210
+ {%- else -%}
211
+ {%- set ns_tb.list = ns_tb.list + [{"type": "function", "function": tool}] -%}
212
+ {%- endif -%}
213
+ {%- endfor -%}
214
+ {%- endif -%}
215
+
216
+ {#- ===== SECTION 7: OUTPUT SYSTEM TURN =====
217
+ Each fragment output via its own {{ }} block so tojson Markup objects are
218
+ never Python-concatenated with plain strings (would trigger HTML-escaping).
219
+ User system content appears BEFORE the tools block (correct ordering).
220
+ No default system prompt injected.
221
+ -#}
222
+ {%- if ns_sys.content or _has_tools -%}
223
+ {{- '<|im_start|>system\n' -}}
224
+ {%- if ns_sys.content -%}
225
+ {{- ns_sys.content -}}
226
+ {%- if _has_tools -%}{{- '\n\n' -}}{%- endif -%}
227
+ {%- endif -%}
228
+ {%- if _has_tools -%}
229
+ {{- '# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n' -}}
230
+ {%- for tool in ns_tb.list -%}
231
+ {{- tool | tojson -}}
232
+ {%- if not loop.last -%}{{- '\n' -}}{%- endif -%}
233
+ {%- endfor -%}
234
+ {{- '\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>' -}}
235
+ {%- endif -%}
236
+ {{- '<|im_end|>\n' -}}
237
+ {%- endif -%}
238
+
239
+ {#- ===== SECTION 8: MAIN MESSAGE LOOP =====
240
+ FIXED in v0.7:
241
+ - Tool responses now have error detection via detect_tool_error macro
242
+ - Warning messages injected for failed tool calls
243
+ - consecutive_failures tracking for escalating warnings
244
+ -#}
245
+ {%- for message in messages -%}
246
+
247
+ {#- 8a: System / Developer — already rendered above, skip -#}
248
+ {%- if message.role == 'system' or message.role == 'developer' -%}
249
+
250
+ {#- 8b: User messages -#}
251
+ {%- elif message.role == 'user' -%}
252
+ {%- set _uc = render_content(message.content | default(''), true, false) -%}
253
+ {{- '<|im_start|>user\n' + _uc + '<|im_end|>\n' -}}
254
+
255
+ {#- 8c: Assistant messages -#}
256
+ {%- elif message.role == 'assistant' -%}
257
+ {#- Safely extract content as string — guard against absent key.
258
+ Also support message.reasoning_content as an explicit think-block source
259
+ (used by some frameworks that store thinking separately from content). -#}
260
+ {%- if message.content is defined and message.content is string -%}
261
+ {%- set _ac = message.content -%}
262
+ {#- FIX: also exclude strings - llama.cpp treats strings as non-iterable in for loops -#}
263
+ {%- elif message.content is defined and message.content is iterable and message.content is not mapping and message.content is not string -%}
264
+ {%- set _ac = render_content(message.content, false, false) -%}
265
+ {%- else -%}
266
+ {%- set _ac = '' -%}
267
+ {%- endif -%}
268
+
269
+ {#- Reconstruct content from reasoning_content + content when the framework
270
+ stores thinking separately (e.g. OpenAI-style reasoning_content field).
271
+ Only apply when no think-block already present in _ac. -#}
272
+ {%- if message.reasoning_content is defined and message.reasoning_content is string
273
+ and message.reasoning_content | trim
274
+ and '<think>' not in _ac -%}
275
+ {%- set _ac = '<think>\n' + message.reasoning_content | trim + '\n</think>\n\n' + _ac -%}
276
+ {%- endif -%}
277
+
278
+ {#- Collect tool_calls if present -#}
279
+ {#- Type check: ensure tool_calls is a list, not string (llama.cpp compatibility) -#}
280
+ {%- set _tc = message.tool_calls if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls is not string else [] -%}
281
+
282
+ {#- Strip <tool_call> prefix from content when tool_calls also present
283
+ (some frameworks duplicate the data in both fields) -#}
284
+ {%- if _tc and '<tool_call>' in _ac -%}
285
+ {%- set _ac = _ac.split('<tool_call>')[0] | trim -%}
286
+ {%- endif -%}
287
+
288
+ {#- FIXED in v0.7: Think-block handling with preserve_thinking support
289
+
290
+ New logic (from v18): preserve_thinking controls think-block display on ALL
291
+ assistant messages, not just generation prompt:
292
+
293
+ - Tool-call turns : never strip (think block is part of the tool-call format)
294
+ - preserve_thinking : if true, show think blocks on ALL messages
295
+ - Last-history turn : if preserve_thinking false, apply last-turn handling
296
+ - Historical turns : if preserve_thinking false, strip think blocks
297
+
298
+ The old behavior (strip unless add_generation_prompt) is now controlled
299
+ by preserve_thinking parameter.
300
+ -#}
301
+ {%- set _show_think = false -%}
302
+ {%- if _tc -%}
303
+ {#- Tool calls: always show think block -#}
304
+ {%- set _show_think = true -%}
305
+ {%- elif ns.preserve_thinking -%}
306
+ {#- preserve_thinking=true: show think blocks on all messages -#}
307
+ {%- set _show_think = true -%}
308
+ {%- elif loop.last -%}
309
+ {#- Last message without preserve_thinking: show if thinking enabled -#}
310
+ {%- set _show_think = ns.enable_thinking -%}
311
+ {%- endif -%}
312
+
313
+ {#- Apply think-block stripping based on _show_think flag -#}
314
+ {%- if not _show_think -%}
315
+ {#- Fuzzy end-tag detection for stripping -#}
316
+ {%- set _think_end = '' -%}
317
+ {%- if '</think>' in _ac -%}
318
+ {%- set _think_end = '</think>' -%}
319
+ {%- elif '</thinking>' in _ac -%}
320
+ {%- set _think_end = '</thinking>' -%}
321
+ {%- elif '</ think>' in _ac -%}
322
+ {%- set _think_end = '</ think>' -%}
323
+ {%- elif '</think >' in _ac -%}
324
+ {%- set _think_end = '</think >' -%}
325
+ {%- endif -%}
326
+ {%- if _think_end -%}
327
+ {%- set _ac = _ac.split(_think_end)[-1].lstrip('\n') -%}
328
+ {%- endif -%}
329
+ {%- elif not _tc and loop.last and '<think>' not in _ac and not ns.enable_thinking -%}
330
+ {#- Last turn, non-thinking: inject empty think block if missing -#}
331
+ {%- set _ac = '<think>\n\n</think>\n\n' + _ac -%}
332
+ {%- endif -%}
333
+
334
+ {#- Emit the assistant turn -#}
335
+ {{- '<|im_start|>assistant\n' -}}
336
+ {%- if _ac -%}
337
+ {{- _ac -}}
338
+ {%- if _tc -%}{{- '\n' -}}{%- endif -%}
339
+ {%- endif -%}
340
+
341
+ {#- Render tool calls in Hermes format.
342
+ Each value output via its own {{ }} block — never concatenated with plain strings
343
+ in Python, which would trigger Markup HTML-escaping. -#}
344
+ {%- if _tc -%}
345
+ {%- for tc in _tc -%}
346
+ {{- '<tool_call>\n' -}}
347
+ {{- '{"name": ' -}}{{- tc.function.name | tojson -}}
348
+ {%- if tc.function.arguments is string -%}
349
+ {{- ', "arguments": ' + tc.function.arguments -}}
350
+ {%- else -%}
351
+ {{- ', "arguments": ' -}}{{- tc.function.arguments | tojson -}}
352
+ {%- endif -%}
353
+ {{- '}' -}}
354
+ {%- if not loop.last -%}
355
+ {{- '\n</tool_call>\n' -}}
356
+ {%- else -%}
357
+ {{- '\n</tool_call>' -}}
358
+ {%- endif -%}
359
+ {%- endfor -%}
360
+ {%- endif -%}
361
+ {{- '<|im_end|>\n' -}}
362
+
363
+ {#- 8d: Tool results — with error detection (NEW in v0.7) -#}
364
+ {%- elif message.role == 'tool' -%}
365
+ {%- set _prev_role = messages[loop.index0 - 1].role if loop.index0 > 0 else '' -%}
366
+ {%- set _next_role = messages[loop.index0 + 1].role if not loop.last else '' -%}
367
+
368
+ {#- NEW in v0.7: Detect errors in tool response -#}
369
+ {%- set _tool_content = message.content | default('') -%}
370
+ {{- detect_tool_error(_tool_content) -}}
371
+
372
+ {%- if _prev_role != 'tool' -%}
373
+ {{- '<|im_start|>user\n' -}}
374
+ {%- endif -%}
375
+ {{- '<tool_response>\n' -}}
376
+ {{- _tool_content -}}
377
+
378
+ {#- NEW in v0.7: Inject warning if tool error detected -#}
379
+ {#- v0.8: Replaced emoji with text-only for tokenization safety -#}
380
+ {%- if ns.last_tool_failed -%}
381
+ {%- if ns.consecutive_failures >= 2 -%}
382
+ {{- '\n\n[SYSTEM WARNING: ' ~ ns.consecutive_failures ~ ' consecutive tool errors detected. Your previous approach is incorrect.]' -}}
383
+ {%- else -%}
384
+ {{- '\n\n[SYSTEM WARNING: The previous tool call returned an error. Diagnose the failure and retry with corrected arguments.]' -}}
385
+ {%- endif -%}
386
+ {%- endif -%}
387
+
388
+ {%- if _next_role == 'tool' -%}
389
+ {{- '\n</tool_response>\n' -}}
390
+ {%- else -%}
391
+ {{- '\n</tool_response>' -}}
392
+ {{- '<|im_end|>\n' -}}
393
+ {%- endif -%}
394
+
395
+ {#- 8e: Unknown role - explicit error (from v18) -#}
396
+ {%- else -%}
397
+ {{- raise_exception('Unexpected message role: ' + message.role) -}}
398
+ {%- endif -%}
399
+
400
+ {%- endfor -%}
401
+
402
+ {#- ===== SECTION 9: GENERATION PROMPT =====
403
+ FIXED in v0.7: preserve_thinking now affects conversation history (Section 8),
404
+ so generation prompt logic is simplified.
405
+
406
+ enable_thinking=True → open <think>\n prefill so llama.cpp reasoning-budget
407
+ and other inference engines can hook into the think-stream.
408
+ The model continues generating inside the open block.
409
+ enable_thinking=False → exact non-thinking prefill: </think>\n\n
410
+
411
+ NOTE: The <think>\n opener is EPHEMERAL — it lives only in the generation
412
+ prompt, never in chat history. Historical think-block stripping is handled
413
+ in Section 8 based on preserve_thinking setting.
414
+ -#}
415
+ {%- if add_generation_prompt -%}
416
+ {{- '<|im_start|>assistant\n' -}}
417
+ {%- if ns.enable_thinking -%}
418
+ {{- '<think>\n' -}}
419
+ {%- else -%}
420
+ {{- '<think>\n\n</think>\n\n' -}}
421
+ {%- endif -%}
422
+ {%- endif -%}