Text Generation
Transformers
Safetensors
granite_switch
language
granite-switch
granite-4.1
conversational
Instructions to use ibm-granite/granite-switch-4.1-8b-preview with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ibm-granite/granite-switch-4.1-8b-preview with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ibm-granite/granite-switch-4.1-8b-preview") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("ibm-granite/granite-switch-4.1-8b-preview", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use ibm-granite/granite-switch-4.1-8b-preview with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ibm-granite/granite-switch-4.1-8b-preview" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ibm-granite/granite-switch-4.1-8b-preview", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/ibm-granite/granite-switch-4.1-8b-preview
- SGLang
How to use ibm-granite/granite-switch-4.1-8b-preview with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ibm-granite/granite-switch-4.1-8b-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ibm-granite/granite-switch-4.1-8b-preview", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ibm-granite/granite-switch-4.1-8b-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ibm-granite/granite-switch-4.1-8b-preview", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use ibm-granite/granite-switch-4.1-8b-preview with Docker Model Runner:
docker model run hf.co/ibm-granite/granite-switch-4.1-8b-preview
Updating GraniteSwitch 4.1 8b
Browse files- BUILD.md +16 -5
- chat_template.jinja +19 -8
- compose_report.json +1 -1
- config.json +13 -71
- io_configs/factuality-correction/io.yaml +8 -2
- io_configs/factuality-detection/io.yaml +8 -2
- io_configs/guardian-core/io.yaml +6 -1
- io_configs/policy-guardrails/io.yaml +13 -1
- io_configs/requirement-check/io.yaml +3 -1
- io_configs/uncertainty/io.yaml +2 -2
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model.safetensors.index.json +3 -4
BUILD.md
CHANGED
|
@@ -35,10 +35,21 @@ Total adapters: **12**
|
|
| 35 |
- composed_param_count: 9,568,112,640
|
| 36 |
- Param delta: +14.17%
|
| 37 |
- compose_settings:
|
| 38 |
-
-
|
| 39 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
- target_model: "granite-4.1-8b"
|
| 41 |
- adapter_sources:
|
| 42 |
-
- "ibm-granite/granitelib-rag-r1.0": "
|
| 43 |
-
- "ibm-granite/granitelib-guardian-r1.0": "
|
| 44 |
-
- "ibm-granite/granitelib-core-r1.0": "
|
|
|
|
| 35 |
- composed_param_count: 9,568,112,640
|
| 36 |
- Param delta: +14.17%
|
| 37 |
- compose_settings:
|
| 38 |
+
- adapter_substitute_token_ids:
|
| 39 |
+
- 100264
|
| 40 |
+
- 100264
|
| 41 |
+
- 100264
|
| 42 |
+
- 100264
|
| 43 |
+
- 100264
|
| 44 |
+
- 27
|
| 45 |
+
- 27
|
| 46 |
+
- 27
|
| 47 |
+
- 27
|
| 48 |
+
- 27
|
| 49 |
+
- 27
|
| 50 |
+
- 100264
|
| 51 |
- target_model: "granite-4.1-8b"
|
| 52 |
- adapter_sources:
|
| 53 |
+
- "ibm-granite/granitelib-rag-r1.0": "2f0b2c79c6731068625aca8045c2eb2e8912b353"
|
| 54 |
+
- "ibm-granite/granitelib-guardian-r1.0": "773b254e98f993a605ec4b6259634906e0e64e8e"
|
| 55 |
+
- "ibm-granite/granitelib-core-r1.0": "d0a2a96a4cd07e96f0fe7ca29a42bfe088299d43"
|
chat_template.jinja
CHANGED
|
@@ -38,7 +38,8 @@
|
|
| 38 |
adapter_token=adapter_token,
|
| 39 |
adapter_type=adapter_type,
|
| 40 |
adapter_invocation_text=adapter_invocation_text,
|
| 41 |
-
alora_target_idx=-1
|
|
|
|
| 42 |
) %}
|
| 43 |
{%- if tools %}
|
| 44 |
{%- for tool in tools %}
|
|
@@ -59,6 +60,7 @@
|
|
| 59 |
{#- For lora adapters: insert activation token at the very beginning -#}
|
| 60 |
{%- if ns.adapter_token and ns.adapter_type == 'lora' %}
|
| 61 |
{{- ns.adapter_token }}
|
|
|
|
| 62 |
{%- endif %}
|
| 63 |
|
| 64 |
{%- if messages[0].role == 'system' %}
|
|
@@ -91,7 +93,8 @@
|
|
| 91 |
{%- endif %}
|
| 92 |
{%- endif %}
|
| 93 |
{%- if ns.system_message %}
|
| 94 |
-
{
|
|
|
|
| 95 |
{%- endif %}
|
| 96 |
{#- ALoRA Pass 1: find the last user message containing the invocation text.
|
| 97 |
ns.alora_target_idx stays -1 when the invocation sequence is the assistant role
|
|
@@ -129,17 +132,22 @@
|
|
| 129 |
{%- endfor %}
|
| 130 |
{%- endif %}
|
| 131 |
{%- endif %}
|
| 132 |
-
{#- ALoRA Pass 2: inject activation token
|
|
|
|
| 133 |
{%- if loop.index0 == ns.alora_target_idx %}
|
| 134 |
{%- set _parts = content.val.rsplit(ns.adapter_invocation_text, 1) %}
|
| 135 |
{%- if _parts | length > 1 %}
|
| 136 |
-
{%- set content.val = _parts[0] + ns.adapter_token + ns.adapter_invocation_text + _parts[1] %}
|
| 137 |
{%- endif %}
|
| 138 |
{%- endif %}
|
| 139 |
{%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
|
| 140 |
-
{
|
|
|
|
|
|
|
| 141 |
{%- elif message.role == 'assistant' %}
|
| 142 |
-
{{-
|
|
|
|
|
|
|
| 143 |
{%- if message.tool_calls %}
|
| 144 |
{%- for tool_call in message.tool_calls %}
|
| 145 |
{%- if (loop.first and content.val) or (not loop.first) %}
|
|
@@ -162,7 +170,8 @@
|
|
| 162 |
{{- '<|end_of_text|>\n' }}
|
| 163 |
{%- elif message.role == 'tool' %}
|
| 164 |
{%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
|
| 165 |
-
{{- '<|start_of_role|>
|
|
|
|
| 166 |
{%- endif %}
|
| 167 |
{{- '\n<tool_response>\n' }}
|
| 168 |
{{- content.val }}
|
|
@@ -178,7 +187,9 @@
|
|
| 178 |
role token boundary rather than inside a user message. -#}
|
| 179 |
{%- if ns.adapter_token and ns.adapter_type == 'alora' and ns.alora_target_idx == -1 %}
|
| 180 |
{{- ns.adapter_token }}
|
|
|
|
| 181 |
{%- endif %}
|
| 182 |
{%- if add_generation_prompt %}
|
| 183 |
-
{{- '<|start_of_role|>
|
|
|
|
| 184 |
{%- endif %}
|
|
|
|
| 38 |
adapter_token=adapter_token,
|
| 39 |
adapter_type=adapter_type,
|
| 40 |
adapter_invocation_text=adapter_invocation_text,
|
| 41 |
+
alora_target_idx=-1,
|
| 42 |
+
skip_next_start_of_role=false
|
| 43 |
) %}
|
| 44 |
{%- if tools %}
|
| 45 |
{%- for tool in tools %}
|
|
|
|
| 60 |
{#- For lora adapters: insert activation token at the very beginning -#}
|
| 61 |
{%- if ns.adapter_token and ns.adapter_type == 'lora' %}
|
| 62 |
{{- ns.adapter_token }}
|
| 63 |
+
{%- set ns.skip_next_start_of_role = true %}
|
| 64 |
{%- endif %}
|
| 65 |
|
| 66 |
{%- if messages[0].role == 'system' %}
|
|
|
|
| 93 |
{%- endif %}
|
| 94 |
{%- endif %}
|
| 95 |
{%- if ns.system_message %}
|
| 96 |
+
{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
|
| 97 |
+
{{- 'system<|end_of_role|>' + ns.system_message + '<|end_of_text|>\n' }}
|
| 98 |
{%- endif %}
|
| 99 |
{#- ALoRA Pass 1: find the last user message containing the invocation text.
|
| 100 |
ns.alora_target_idx stays -1 when the invocation sequence is the assistant role
|
|
|
|
| 132 |
{%- endfor %}
|
| 133 |
{%- endif %}
|
| 134 |
{%- endif %}
|
| 135 |
+
{#- ALoRA Pass 2: inject activation token AND drop the first char of
|
| 136 |
+
the invocation text so the runtime-swapped embedding doesn't duplicate. -#}
|
| 137 |
{%- if loop.index0 == ns.alora_target_idx %}
|
| 138 |
{%- set _parts = content.val.rsplit(ns.adapter_invocation_text, 1) %}
|
| 139 |
{%- if _parts | length > 1 %}
|
| 140 |
+
{%- set content.val = _parts[0] + ns.adapter_token + ns.adapter_invocation_text[1:] + _parts[1] %}
|
| 141 |
{%- endif %}
|
| 142 |
{%- endif %}
|
| 143 |
{%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
|
| 144 |
+
{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
|
| 145 |
+
{{- '' }}{%- endif %}
|
| 146 |
+
{{- message.role + '<|end_of_role|>' + content.val + '<|end_of_text|>\n' }}
|
| 147 |
{%- elif message.role == 'assistant' %}
|
| 148 |
+
{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
|
| 149 |
+
{{- '' }}{%- endif %}
|
| 150 |
+
{{- message.role + '<|end_of_role|>' + content.val }}
|
| 151 |
{%- if message.tool_calls %}
|
| 152 |
{%- for tool_call in message.tool_calls %}
|
| 153 |
{%- if (loop.first and content.val) or (not loop.first) %}
|
|
|
|
| 170 |
{{- '<|end_of_text|>\n' }}
|
| 171 |
{%- elif message.role == 'tool' %}
|
| 172 |
{%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
|
| 173 |
+
{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
|
| 174 |
+
{{- 'user<|end_of_role|>' }}
|
| 175 |
{%- endif %}
|
| 176 |
{{- '\n<tool_response>\n' }}
|
| 177 |
{{- content.val }}
|
|
|
|
| 187 |
role token boundary rather than inside a user message. -#}
|
| 188 |
{%- if ns.adapter_token and ns.adapter_type == 'alora' and ns.alora_target_idx == -1 %}
|
| 189 |
{{- ns.adapter_token }}
|
| 190 |
+
{%- set ns.skip_next_start_of_role = true %}
|
| 191 |
{%- endif %}
|
| 192 |
{%- if add_generation_prompt %}
|
| 193 |
+
{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
|
| 194 |
+
{{- 'assistant<|end_of_role|>' }}
|
| 195 |
{%- endif %}
|
compose_report.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"timestamp": "2026-05-
|
| 4 |
},
|
| 5 |
"base_model_mapping": [
|
| 6 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"timestamp": "2026-05-21T05:14:40.268526"
|
| 4 |
},
|
| 5 |
"base_model_mapping": [
|
| 6 |
{
|
config.json
CHANGED
|
@@ -27,19 +27,19 @@
|
|
| 27 |
16,
|
| 28 |
16
|
| 29 |
],
|
| 30 |
-
"
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
],
|
| 44 |
"adapter_token_ids": [
|
| 45 |
100352,
|
|
@@ -62,7 +62,6 @@
|
|
| 62 |
"attention_dropout": 0.0,
|
| 63 |
"attention_multiplier": 0.0078125,
|
| 64 |
"bos_token_id": 100257,
|
| 65 |
-
"control_dims": 32,
|
| 66 |
"control_token_gain": 15.0,
|
| 67 |
"dtype": "bfloat16",
|
| 68 |
"embedding_multiplier": 12.0,
|
|
@@ -70,63 +69,6 @@
|
|
| 70 |
"fused_add_norm": false,
|
| 71 |
"hidden_act": "silu",
|
| 72 |
"hidden_size": 4096,
|
| 73 |
-
"hiding_groups": {
|
| 74 |
-
"all_controls": [
|
| 75 |
-
"citations",
|
| 76 |
-
"query_rewrite",
|
| 77 |
-
"query_clarification",
|
| 78 |
-
"hallucination_detection",
|
| 79 |
-
"answerability",
|
| 80 |
-
"factuality-detection",
|
| 81 |
-
"policy-guardrails",
|
| 82 |
-
"factuality-correction",
|
| 83 |
-
"guardian-core",
|
| 84 |
-
"uncertainty",
|
| 85 |
-
"requirement-check",
|
| 86 |
-
"context-attribution"
|
| 87 |
-
]
|
| 88 |
-
},
|
| 89 |
-
"hiding_policy": {
|
| 90 |
-
"answerability": [
|
| 91 |
-
"all_controls"
|
| 92 |
-
],
|
| 93 |
-
"base": [
|
| 94 |
-
"all_controls"
|
| 95 |
-
],
|
| 96 |
-
"citations": [
|
| 97 |
-
"all_controls"
|
| 98 |
-
],
|
| 99 |
-
"context-attribution": [
|
| 100 |
-
"all_controls"
|
| 101 |
-
],
|
| 102 |
-
"factuality-correction": [
|
| 103 |
-
"all_controls"
|
| 104 |
-
],
|
| 105 |
-
"factuality-detection": [
|
| 106 |
-
"all_controls"
|
| 107 |
-
],
|
| 108 |
-
"guardian-core": [
|
| 109 |
-
"all_controls"
|
| 110 |
-
],
|
| 111 |
-
"hallucination_detection": [
|
| 112 |
-
"all_controls"
|
| 113 |
-
],
|
| 114 |
-
"policy-guardrails": [
|
| 115 |
-
"all_controls"
|
| 116 |
-
],
|
| 117 |
-
"query_clarification": [
|
| 118 |
-
"all_controls"
|
| 119 |
-
],
|
| 120 |
-
"query_rewrite": [
|
| 121 |
-
"all_controls"
|
| 122 |
-
],
|
| 123 |
-
"requirement-check": [
|
| 124 |
-
"all_controls"
|
| 125 |
-
],
|
| 126 |
-
"uncertainty": [
|
| 127 |
-
"all_controls"
|
| 128 |
-
]
|
| 129 |
-
},
|
| 130 |
"initializer_range": 0.1,
|
| 131 |
"intermediate_size": 12800,
|
| 132 |
"layer_types": [
|
|
|
|
| 27 |
16,
|
| 28 |
16
|
| 29 |
],
|
| 30 |
+
"adapter_substitute_token_ids": [
|
| 31 |
+
100264,
|
| 32 |
+
100264,
|
| 33 |
+
100264,
|
| 34 |
+
100264,
|
| 35 |
+
100264,
|
| 36 |
+
27,
|
| 37 |
+
27,
|
| 38 |
+
27,
|
| 39 |
+
27,
|
| 40 |
+
27,
|
| 41 |
+
27,
|
| 42 |
+
100264
|
| 43 |
],
|
| 44 |
"adapter_token_ids": [
|
| 45 |
100352,
|
|
|
|
| 62 |
"attention_dropout": 0.0,
|
| 63 |
"attention_multiplier": 0.0078125,
|
| 64 |
"bos_token_id": 100257,
|
|
|
|
| 65 |
"control_token_gain": 15.0,
|
| 66 |
"dtype": "bfloat16",
|
| 67 |
"embedding_multiplier": 12.0,
|
|
|
|
| 69 |
"fused_add_norm": false,
|
| 70 |
"hidden_act": "silu",
|
| 71 |
"hidden_size": 4096,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"initializer_range": 0.1,
|
| 73 |
"intermediate_size": 12800,
|
| 74 |
"layer_types": [
|
io_configs/factuality-correction/io.yaml
CHANGED
|
@@ -14,10 +14,16 @@ response_format: |
|
|
| 14 |
"required": ["correction"]
|
| 15 |
}
|
| 16 |
transformations: ~
|
| 17 |
-
instruction:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
parameters:
|
| 19 |
# corrected response can several hundred tokens at high temperatures
|
| 20 |
max_completion_tokens: 4096
|
| 21 |
temperature: 0.0
|
| 22 |
# No sentence boundary detection
|
| 23 |
-
sentence_boundaries: ~
|
|
|
|
| 14 |
"required": ["correction"]
|
| 15 |
}
|
| 16 |
transformations: ~
|
| 17 |
+
instruction: |2
|
| 18 |
+
|
| 19 |
+
<guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
|
| 20 |
+
|
| 21 |
+
### Criteria: A factually incorrect response occurs when the assistant's message contains one or more factual claims that are unsupported by, inconsistent with, or directly contradicted by the information provided in the documents or context. This includes situations where the assistant: introduces details not grounded in the context, misstates or distorts facts contained within the context, misinterprets the meaning or implications of the context, supplies erroneous or conflicting information relative to the context. Even if only a small portion of the response contains such inaccuracies, the overall message is considered factually incorrect.
|
| 22 |
+
|
| 23 |
+
### Scoring Schema: If the last assistant's text meets the criteria, return a corrected version of the assistant's message based on the given context; otherwise, return 'none'.
|
| 24 |
parameters:
|
| 25 |
# corrected response can several hundred tokens at high temperatures
|
| 26 |
max_completion_tokens: 4096
|
| 27 |
temperature: 0.0
|
| 28 |
# No sentence boundary detection
|
| 29 |
+
sentence_boundaries: ~
|
io_configs/factuality-detection/io.yaml
CHANGED
|
@@ -16,9 +16,15 @@ response_format: |
|
|
| 16 |
"additionalProperties": false
|
| 17 |
}
|
| 18 |
transformations: ~
|
| 19 |
-
instruction:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
parameters:
|
| 21 |
max_completion_tokens: 20
|
| 22 |
temperature: 0.0
|
| 23 |
# No sentence boundary detection
|
| 24 |
-
sentence_boundaries: ~
|
|
|
|
| 16 |
"additionalProperties": false
|
| 17 |
}
|
| 18 |
transformations: ~
|
| 19 |
+
instruction: |2
|
| 20 |
+
|
| 21 |
+
<guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
|
| 22 |
+
|
| 23 |
+
### Criteria: A factually incorrect response occurs when the assistant's message contains one or more factual claims that are unsupported by, inconsistent with, or directly contradicted by the information provided in the documents or context. This includes situations where the assistant: introduces details not grounded in the context, misstates or distorts facts contained within the context, misinterprets the meaning or implications of the context, supplies erroneous or conflicting information relative to the context. Even if only a small portion of the response contains such inaccuracies, the overall message is considered factually incorrect.
|
| 24 |
+
|
| 25 |
+
### Scoring Schema: If the last assistant's text meets the criteria, return 'yes'; otherwise, return 'no'.
|
| 26 |
parameters:
|
| 27 |
max_completion_tokens: 20
|
| 28 |
temperature: 0.0
|
| 29 |
# No sentence boundary detection
|
| 30 |
+
sentence_boundaries: ~
|
io_configs/guardian-core/io.yaml
CHANGED
|
@@ -25,7 +25,12 @@ transformations:
|
|
| 25 |
- type: nest
|
| 26 |
input_path: []
|
| 27 |
field_name: "guardian"
|
| 28 |
-
instruction:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
parameters:
|
| 30 |
max_completion_tokens: 15
|
| 31 |
temperature: 0.0
|
|
|
|
| 25 |
- type: nest
|
| 26 |
input_path: []
|
| 27 |
field_name: "guardian"
|
| 28 |
+
instruction: |-
|
| 29 |
+
<guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
|
| 30 |
+
|
| 31 |
+
### Criteria: {criteria}
|
| 32 |
+
|
| 33 |
+
### Scoring Schema: {scoring_schema}
|
| 34 |
parameters:
|
| 35 |
max_completion_tokens: 15
|
| 36 |
temperature: 0.0
|
io_configs/policy-guardrails/io.yaml
CHANGED
|
@@ -16,7 +16,19 @@ response_format: |
|
|
| 16 |
"additionalProperties": false
|
| 17 |
}
|
| 18 |
transformations: ~
|
| 19 |
-
instruction:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
parameters:
|
| 21 |
temperature: 0.0
|
| 22 |
max_completion_tokens: 20
|
|
|
|
| 16 |
"additionalProperties": false
|
| 17 |
}
|
| 18 |
transformations: ~
|
| 19 |
+
instruction: |-
|
| 20 |
+
<guardian> You are a compliance agent trying to help determine whether a scenario is compliant with a given policy.
|
| 21 |
+
|
| 22 |
+
### Criteria: Policy: {policy_text}
|
| 23 |
+
|
| 24 |
+
### Scoring Schema: Does the scenario described in the previous message comply with the policy? Compliance is determined based on the following guidelines:
|
| 25 |
+
<start_of_guidelines>:
|
| 26 |
+
- "Yes" if, given the scenario and the policy, it can be decided with certainty that the scenario complies with the policy
|
| 27 |
+
- "No" if, given the scenario and the policy, it can be decided with certainty that the scenario does not comply with the policy
|
| 28 |
+
- "Ambiguous" if, given the scenario and the policy, it is not possible to decide whether the scenario complies with, or violates, the policy and more information is needed to decide with certainty.
|
| 29 |
+
<end_of_guidelines>
|
| 30 |
+
|
| 31 |
+
Your answer must be either "Yes", "No", or "Ambiguous". You MUST return your answer as a valid JSON object with the key "label". For example, if your answer is "Yes", respond as "{{"label":"Yes"}}".
|
| 32 |
parameters:
|
| 33 |
temperature: 0.0
|
| 34 |
max_completion_tokens: 20
|
io_configs/requirement-check/io.yaml
CHANGED
|
@@ -25,7 +25,9 @@ transformations:
|
|
| 25 |
- type: nest
|
| 26 |
input_path: []
|
| 27 |
field_name: "requirement_check"
|
| 28 |
-
instruction:
|
|
|
|
|
|
|
| 29 |
parameters:
|
| 30 |
max_completion_tokens: 15
|
| 31 |
temperature: 0.0
|
|
|
|
| 25 |
- type: nest
|
| 26 |
input_path: []
|
| 27 |
field_name: "requirement_check"
|
| 28 |
+
instruction: |-
|
| 29 |
+
<requirements>: {requirement}
|
| 30 |
+
Please verify if the assistant's generation satisfies the user's requirements or not and reply with a binary label accordingly. Respond with a json {{"score": "yes"}} if the constraints are satisfied or respond with {{"score": "no"}} if the constraints are not satisfied.
|
| 31 |
parameters:
|
| 32 |
max_completion_tokens: 15
|
| 33 |
temperature: 0.0
|
io_configs/uncertainty/io.yaml
CHANGED
|
@@ -35,8 +35,8 @@ transformations:
|
|
| 35 |
input_path: []
|
| 36 |
retained_fields:
|
| 37 |
score: "certainty"
|
| 38 |
-
instruction:
|
| 39 |
parameters:
|
| 40 |
max_completion_tokens: 15
|
| 41 |
temperature: 0.0
|
| 42 |
-
sentence_boundaries: ~
|
|
|
|
| 35 |
input_path: []
|
| 36 |
retained_fields:
|
| 37 |
score: "certainty"
|
| 38 |
+
instruction: <certainty>
|
| 39 |
parameters:
|
| 40 |
max_completion_tokens: 15
|
| 41 |
temperature: 0.0
|
| 42 |
+
sentence_boundaries: ~
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b153d175bf5a24225a5138dddd9501a77fb3595bc95255a5a0603e038a1eb68
|
| 3 |
+
size 4997144072
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06644fb2f44e8b96391b0fc9d5b27c627969d76efe162767d523b484d94e828b
|
| 3 |
+
size 4982910656
|
model.safetensors.index.json
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 9568112640,
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
-
"model.adapter_hiding_matrix": "model-00001-of-00004.safetensors",
|
| 8 |
"model.adapter_token_ids": "model-00001-of-00004.safetensors",
|
| 9 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
| 10 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
|
@@ -794,7 +793,7 @@
|
|
| 794 |
"model.layers.9.self_attn.o_proj.lora_B": "model-00002-of-00004.safetensors",
|
| 795 |
"model.layers.9.self_attn.qkv_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
| 796 |
"model.layers.9.self_attn.qkv_proj.lora_A_slices.0": "model-00001-of-00004.safetensors",
|
| 797 |
-
"model.layers.9.self_attn.qkv_proj.lora_A_slices.1": "model-
|
| 798 |
"model.layers.9.self_attn.qkv_proj.lora_A_slices.2": "model-00002-of-00004.safetensors",
|
| 799 |
"model.layers.9.self_attn.qkv_proj.lora_B_slices.0": "model-00002-of-00004.safetensors",
|
| 800 |
"model.layers.9.self_attn.qkv_proj.lora_B_slices.1": "model-00002-of-00004.safetensors",
|
|
@@ -808,6 +807,6 @@
|
|
| 808 |
"model.layers.9.shared_mlp.output_linear.lora_A": "model-00002-of-00004.safetensors",
|
| 809 |
"model.layers.9.shared_mlp.output_linear.lora_B": "model-00002-of-00004.safetensors",
|
| 810 |
"model.norm.weight": "model-00004-of-00004.safetensors",
|
| 811 |
-
"model.
|
| 812 |
}
|
| 813 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 9568112640,
|
| 4 |
+
"total_size": 19137028288
|
| 5 |
},
|
| 6 |
"weight_map": {
|
|
|
|
| 7 |
"model.adapter_token_ids": "model-00001-of-00004.safetensors",
|
| 8 |
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
| 9 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
|
|
|
| 793 |
"model.layers.9.self_attn.o_proj.lora_B": "model-00002-of-00004.safetensors",
|
| 794 |
"model.layers.9.self_attn.qkv_proj.base_layer.weight": "model-00001-of-00004.safetensors",
|
| 795 |
"model.layers.9.self_attn.qkv_proj.lora_A_slices.0": "model-00001-of-00004.safetensors",
|
| 796 |
+
"model.layers.9.self_attn.qkv_proj.lora_A_slices.1": "model-00002-of-00004.safetensors",
|
| 797 |
"model.layers.9.self_attn.qkv_proj.lora_A_slices.2": "model-00002-of-00004.safetensors",
|
| 798 |
"model.layers.9.self_attn.qkv_proj.lora_B_slices.0": "model-00002-of-00004.safetensors",
|
| 799 |
"model.layers.9.self_attn.qkv_proj.lora_B_slices.1": "model-00002-of-00004.safetensors",
|
|
|
|
| 807 |
"model.layers.9.shared_mlp.output_linear.lora_A": "model-00002-of-00004.safetensors",
|
| 808 |
"model.layers.9.shared_mlp.output_linear.lora_B": "model-00002-of-00004.safetensors",
|
| 809 |
"model.norm.weight": "model-00004-of-00004.safetensors",
|
| 810 |
+
"model.switch.control_to_substitute_lut": "model-00001-of-00004.safetensors"
|
| 811 |
}
|
| 812 |
}
|