antonpuz commited on
Commit
c739556
·
verified ·
1 Parent(s): 6cdcd19

Updating GraniteSwitch 4.1 8b

Browse files
BUILD.md CHANGED
@@ -35,10 +35,21 @@ Total adapters: **12**
35
  - composed_param_count: 9,568,112,640
36
  - Param delta: +14.17%
37
  - compose_settings:
38
- - exclude_adapters:
39
- - "context_relevance"
 
 
 
 
 
 
 
 
 
 
 
40
  - target_model: "granite-4.1-8b"
41
  - adapter_sources:
42
- - "ibm-granite/granitelib-rag-r1.0": "6e4a75e35f1cb272e8d15b4615fb0a123398d1cf"
43
- - "ibm-granite/granitelib-guardian-r1.0": "882ccf11cf1e4cdc3a66044f17872e55078dbc85"
44
- - "ibm-granite/granitelib-core-r1.0": "8f78babf3f0d5baba230464838050a71fe59dee5"
 
35
  - composed_param_count: 9,568,112,640
36
  - Param delta: +14.17%
37
  - compose_settings:
38
+ - adapter_substitute_token_ids:
39
+ - 100264
40
+ - 100264
41
+ - 100264
42
+ - 100264
43
+ - 100264
44
+ - 27
45
+ - 27
46
+ - 27
47
+ - 27
48
+ - 27
49
+ - 27
50
+ - 100264
51
  - target_model: "granite-4.1-8b"
52
  - adapter_sources:
53
+ - "ibm-granite/granitelib-rag-r1.0": "2f0b2c79c6731068625aca8045c2eb2e8912b353"
54
+ - "ibm-granite/granitelib-guardian-r1.0": "773b254e98f993a605ec4b6259634906e0e64e8e"
55
+ - "ibm-granite/granitelib-core-r1.0": "d0a2a96a4cd07e96f0fe7ca29a42bfe088299d43"
chat_template.jinja CHANGED
@@ -38,7 +38,8 @@
38
  adapter_token=adapter_token,
39
  adapter_type=adapter_type,
40
  adapter_invocation_text=adapter_invocation_text,
41
- alora_target_idx=-1
 
42
  ) %}
43
  {%- if tools %}
44
  {%- for tool in tools %}
@@ -59,6 +60,7 @@
59
  {#- For lora adapters: insert activation token at the very beginning -#}
60
  {%- if ns.adapter_token and ns.adapter_type == 'lora' %}
61
  {{- ns.adapter_token }}
 
62
  {%- endif %}
63
 
64
  {%- if messages[0].role == 'system' %}
@@ -91,7 +93,8 @@
91
  {%- endif %}
92
  {%- endif %}
93
  {%- if ns.system_message %}
94
- {{- '<|start_of_role|>system<|end_of_role|>' + ns.system_message + '<|end_of_text|>\n' }}
 
95
  {%- endif %}
96
  {#- ALoRA Pass 1: find the last user message containing the invocation text.
97
  ns.alora_target_idx stays -1 when the invocation sequence is the assistant role
@@ -129,17 +132,22 @@
129
  {%- endfor %}
130
  {%- endif %}
131
  {%- endif %}
132
- {#- ALoRA Pass 2: inject activation token before invocation text in the target message -#}
 
133
  {%- if loop.index0 == ns.alora_target_idx %}
134
  {%- set _parts = content.val.rsplit(ns.adapter_invocation_text, 1) %}
135
  {%- if _parts | length > 1 %}
136
- {%- set content.val = _parts[0] + ns.adapter_token + ns.adapter_invocation_text + _parts[1] %}
137
  {%- endif %}
138
  {%- endif %}
139
  {%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
140
- {{- '<|start_of_role|>' + message.role + '<|end_of_role|>' + content.val + '<|end_of_text|>\n' }}
 
 
141
  {%- elif message.role == 'assistant' %}
142
- {{- '<|start_of_role|>' + message.role + '<|end_of_role|>' + content.val }}
 
 
143
  {%- if message.tool_calls %}
144
  {%- for tool_call in message.tool_calls %}
145
  {%- if (loop.first and content.val) or (not loop.first) %}
@@ -162,7 +170,8 @@
162
  {{- '<|end_of_text|>\n' }}
163
  {%- elif message.role == 'tool' %}
164
  {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
165
- {{- '<|start_of_role|>user<|end_of_role|>' }}
 
166
  {%- endif %}
167
  {{- '\n<tool_response>\n' }}
168
  {{- content.val }}
@@ -178,7 +187,9 @@
178
  role token boundary rather than inside a user message. -#}
179
  {%- if ns.adapter_token and ns.adapter_type == 'alora' and ns.alora_target_idx == -1 %}
180
  {{- ns.adapter_token }}
 
181
  {%- endif %}
182
  {%- if add_generation_prompt %}
183
- {{- '<|start_of_role|>assistant<|end_of_role|>' }}
 
184
  {%- endif %}
 
38
  adapter_token=adapter_token,
39
  adapter_type=adapter_type,
40
  adapter_invocation_text=adapter_invocation_text,
41
+ alora_target_idx=-1,
42
+ skip_next_start_of_role=false
43
  ) %}
44
  {%- if tools %}
45
  {%- for tool in tools %}
 
60
  {#- For lora adapters: insert activation token at the very beginning -#}
61
  {%- if ns.adapter_token and ns.adapter_type == 'lora' %}
62
  {{- ns.adapter_token }}
63
+ {%- set ns.skip_next_start_of_role = true %}
64
  {%- endif %}
65
 
66
  {%- if messages[0].role == 'system' %}
 
93
  {%- endif %}
94
  {%- endif %}
95
  {%- if ns.system_message %}
96
+ {%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
97
+ {{- 'system<|end_of_role|>' + ns.system_message + '<|end_of_text|>\n' }}
98
  {%- endif %}
99
  {#- ALoRA Pass 1: find the last user message containing the invocation text.
100
  ns.alora_target_idx stays -1 when the invocation sequence is the assistant role
 
132
  {%- endfor %}
133
  {%- endif %}
134
  {%- endif %}
135
+ {#- ALoRA Pass 2: inject activation token AND drop the first char of
136
+ the invocation text so the runtime-swapped embedding doesn't duplicate. -#}
137
  {%- if loop.index0 == ns.alora_target_idx %}
138
  {%- set _parts = content.val.rsplit(ns.adapter_invocation_text, 1) %}
139
  {%- if _parts | length > 1 %}
140
+ {%- set content.val = _parts[0] + ns.adapter_token + ns.adapter_invocation_text[1:] + _parts[1] %}
141
  {%- endif %}
142
  {%- endif %}
143
  {%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
144
+ {%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
145
+ {{- '' }}{%- endif %}
146
+ {{- message.role + '<|end_of_role|>' + content.val + '<|end_of_text|>\n' }}
147
  {%- elif message.role == 'assistant' %}
148
+ {%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
149
+ {{- '' }}{%- endif %}
150
+ {{- message.role + '<|end_of_role|>' + content.val }}
151
  {%- if message.tool_calls %}
152
  {%- for tool_call in message.tool_calls %}
153
  {%- if (loop.first and content.val) or (not loop.first) %}
 
170
  {{- '<|end_of_text|>\n' }}
171
  {%- elif message.role == 'tool' %}
172
  {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
173
+ {%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
174
+ {{- 'user<|end_of_role|>' }}
175
  {%- endif %}
176
  {{- '\n<tool_response>\n' }}
177
  {{- content.val }}
 
187
  role token boundary rather than inside a user message. -#}
188
  {%- if ns.adapter_token and ns.adapter_type == 'alora' and ns.alora_target_idx == -1 %}
189
  {{- ns.adapter_token }}
190
+ {%- set ns.skip_next_start_of_role = true %}
191
  {%- endif %}
192
  {%- if add_generation_prompt %}
193
+ {%- if ns.skip_next_start_of_role %}{%- set ns.skip_next_start_of_role = false %}{%- else %}{{- '<|start_of_role|>' }}{%- endif %}
194
+ {{- 'assistant<|end_of_role|>' }}
195
  {%- endif %}
compose_report.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "timestamp": "2026-05-03T16:10:19.726075"
4
  },
5
  "base_model_mapping": [
6
  {
 
1
  {
2
  "metadata": {
3
+ "timestamp": "2026-05-21T05:14:40.268526"
4
  },
5
  "base_model_mapping": [
6
  {
config.json CHANGED
@@ -27,19 +27,19 @@
27
  16,
28
  16
29
  ],
30
- "adapter_third_party": [
31
- "citations",
32
- "query_rewrite",
33
- "query_clarification",
34
- "hallucination_detection",
35
- "answerability",
36
- "factuality-detection",
37
- "policy-guardrails",
38
- "factuality-correction",
39
- "guardian-core",
40
- "uncertainty",
41
- "requirement-check",
42
- "context-attribution"
43
  ],
44
  "adapter_token_ids": [
45
  100352,
@@ -62,7 +62,6 @@
62
  "attention_dropout": 0.0,
63
  "attention_multiplier": 0.0078125,
64
  "bos_token_id": 100257,
65
- "control_dims": 32,
66
  "control_token_gain": 15.0,
67
  "dtype": "bfloat16",
68
  "embedding_multiplier": 12.0,
@@ -70,63 +69,6 @@
70
  "fused_add_norm": false,
71
  "hidden_act": "silu",
72
  "hidden_size": 4096,
73
- "hiding_groups": {
74
- "all_controls": [
75
- "citations",
76
- "query_rewrite",
77
- "query_clarification",
78
- "hallucination_detection",
79
- "answerability",
80
- "factuality-detection",
81
- "policy-guardrails",
82
- "factuality-correction",
83
- "guardian-core",
84
- "uncertainty",
85
- "requirement-check",
86
- "context-attribution"
87
- ]
88
- },
89
- "hiding_policy": {
90
- "answerability": [
91
- "all_controls"
92
- ],
93
- "base": [
94
- "all_controls"
95
- ],
96
- "citations": [
97
- "all_controls"
98
- ],
99
- "context-attribution": [
100
- "all_controls"
101
- ],
102
- "factuality-correction": [
103
- "all_controls"
104
- ],
105
- "factuality-detection": [
106
- "all_controls"
107
- ],
108
- "guardian-core": [
109
- "all_controls"
110
- ],
111
- "hallucination_detection": [
112
- "all_controls"
113
- ],
114
- "policy-guardrails": [
115
- "all_controls"
116
- ],
117
- "query_clarification": [
118
- "all_controls"
119
- ],
120
- "query_rewrite": [
121
- "all_controls"
122
- ],
123
- "requirement-check": [
124
- "all_controls"
125
- ],
126
- "uncertainty": [
127
- "all_controls"
128
- ]
129
- },
130
  "initializer_range": 0.1,
131
  "intermediate_size": 12800,
132
  "layer_types": [
 
27
  16,
28
  16
29
  ],
30
+ "adapter_substitute_token_ids": [
31
+ 100264,
32
+ 100264,
33
+ 100264,
34
+ 100264,
35
+ 100264,
36
+ 27,
37
+ 27,
38
+ 27,
39
+ 27,
40
+ 27,
41
+ 27,
42
+ 100264
43
  ],
44
  "adapter_token_ids": [
45
  100352,
 
62
  "attention_dropout": 0.0,
63
  "attention_multiplier": 0.0078125,
64
  "bos_token_id": 100257,
 
65
  "control_token_gain": 15.0,
66
  "dtype": "bfloat16",
67
  "embedding_multiplier": 12.0,
 
69
  "fused_add_norm": false,
70
  "hidden_act": "silu",
71
  "hidden_size": 4096,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  "initializer_range": 0.1,
73
  "intermediate_size": 12800,
74
  "layer_types": [
io_configs/factuality-correction/io.yaml CHANGED
@@ -14,10 +14,16 @@ response_format: |
14
  "required": ["correction"]
15
  }
16
  transformations: ~
17
- instruction: ~
 
 
 
 
 
 
18
  parameters:
19
  # corrected response can several hundred tokens at high temperatures
20
  max_completion_tokens: 4096
21
  temperature: 0.0
22
  # No sentence boundary detection
23
- sentence_boundaries: ~
 
14
  "required": ["correction"]
15
  }
16
  transformations: ~
17
+ instruction: |2
18
+
19
+ <guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
20
+
21
+ ### Criteria: A factually incorrect response occurs when the assistant's message contains one or more factual claims that are unsupported by, inconsistent with, or directly contradicted by the information provided in the documents or context. This includes situations where the assistant: introduces details not grounded in the context, misstates or distorts facts contained within the context, misinterprets the meaning or implications of the context, supplies erroneous or conflicting information relative to the context. Even if only a small portion of the response contains such inaccuracies, the overall message is considered factually incorrect.
22
+
23
+ ### Scoring Schema: If the last assistant's text meets the criteria, return a corrected version of the assistant's message based on the given context; otherwise, return 'none'.
24
  parameters:
25
  # corrected response can several hundred tokens at high temperatures
26
  max_completion_tokens: 4096
27
  temperature: 0.0
28
  # No sentence boundary detection
29
+ sentence_boundaries: ~
io_configs/factuality-detection/io.yaml CHANGED
@@ -16,9 +16,15 @@ response_format: |
16
  "additionalProperties": false
17
  }
18
  transformations: ~
19
- instruction: ~
 
 
 
 
 
 
20
  parameters:
21
  max_completion_tokens: 20
22
  temperature: 0.0
23
  # No sentence boundary detection
24
- sentence_boundaries: ~
 
16
  "additionalProperties": false
17
  }
18
  transformations: ~
19
+ instruction: |2
20
+
21
+ <guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
22
+
23
+ ### Criteria: A factually incorrect response occurs when the assistant's message contains one or more factual claims that are unsupported by, inconsistent with, or directly contradicted by the information provided in the documents or context. This includes situations where the assistant: introduces details not grounded in the context, misstates or distorts facts contained within the context, misinterprets the meaning or implications of the context, supplies erroneous or conflicting information relative to the context. Even if only a small portion of the response contains such inaccuracies, the overall message is considered factually incorrect.
24
+
25
+ ### Scoring Schema: If the last assistant's text meets the criteria, return 'yes'; otherwise, return 'no'.
26
  parameters:
27
  max_completion_tokens: 20
28
  temperature: 0.0
29
  # No sentence boundary detection
30
+ sentence_boundaries: ~
io_configs/guardian-core/io.yaml CHANGED
@@ -25,7 +25,12 @@ transformations:
25
  - type: nest
26
  input_path: []
27
  field_name: "guardian"
28
- instruction: ~
 
 
 
 
 
29
  parameters:
30
  max_completion_tokens: 15
31
  temperature: 0.0
 
25
  - type: nest
26
  input_path: []
27
  field_name: "guardian"
28
+ instruction: |-
29
+ <guardian>As a judge agent, your role is to help assess whether the provided text meets the given judging criteria, utilizing all available information, including conversations, documents, and tools.
30
+
31
+ ### Criteria: {criteria}
32
+
33
+ ### Scoring Schema: {scoring_schema}
34
  parameters:
35
  max_completion_tokens: 15
36
  temperature: 0.0
io_configs/policy-guardrails/io.yaml CHANGED
@@ -16,7 +16,19 @@ response_format: |
16
  "additionalProperties": false
17
  }
18
  transformations: ~
19
- instruction: ~
 
 
 
 
 
 
 
 
 
 
 
 
20
  parameters:
21
  temperature: 0.0
22
  max_completion_tokens: 20
 
16
  "additionalProperties": false
17
  }
18
  transformations: ~
19
+ instruction: |-
20
+ <guardian> You are a compliance agent trying to help determine whether a scenario is compliant with a given policy.
21
+
22
+ ### Criteria: Policy: {policy_text}
23
+
24
+ ### Scoring Schema: Does the scenario described in the previous message comply with the policy? Compliance is determined based on the following guidelines:
25
+ <start_of_guidelines>:
26
+ - "Yes" if, given the scenario and the policy, it can be decided with certainty that the scenario complies with the policy
27
+ - "No" if, given the scenario and the policy, it can be decided with certainty that the scenario does not comply with the policy
28
+ - "Ambiguous" if, given the scenario and the policy, it is not possible to decide whether the scenario complies with, or violates, the policy and more information is needed to decide with certainty.
29
+ <end_of_guidelines>
30
+
31
+ Your answer must be either "Yes", "No", or "Ambiguous". You MUST return your answer as a valid JSON object with the key "label". For example, if your answer is "Yes", respond as "{{"label":"Yes"}}".
32
  parameters:
33
  temperature: 0.0
34
  max_completion_tokens: 20
io_configs/requirement-check/io.yaml CHANGED
@@ -25,7 +25,9 @@ transformations:
25
  - type: nest
26
  input_path: []
27
  field_name: "requirement_check"
28
- instruction: ~
 
 
29
  parameters:
30
  max_completion_tokens: 15
31
  temperature: 0.0
 
25
  - type: nest
26
  input_path: []
27
  field_name: "requirement_check"
28
+ instruction: |-
29
+ <requirements>: {requirement}
30
+ Please verify if the assistant's generation satisfies the user's requirements or not and reply with a binary label accordingly. Respond with a json {{"score": "yes"}} if the constraints are satisfied or respond with {{"score": "no"}} if the constraints are not satisfied.
31
  parameters:
32
  max_completion_tokens: 15
33
  temperature: 0.0
io_configs/uncertainty/io.yaml CHANGED
@@ -35,8 +35,8 @@ transformations:
35
  input_path: []
36
  retained_fields:
37
  score: "certainty"
38
- instruction: ~
39
  parameters:
40
  max_completion_tokens: 15
41
  temperature: 0.0
42
- sentence_boundaries: ~
 
35
  input_path: []
36
  retained_fields:
37
  score: "certainty"
38
+ instruction: <certainty>
39
  parameters:
40
  max_completion_tokens: 15
41
  temperature: 0.0
42
+ sentence_boundaries: ~
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e2e60a9d257ce24d7196c82ded9965b4a37b3f4d40b0dd9d2378e60ae2259aa
3
- size 4999587489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b153d175bf5a24225a5138dddd9501a77fb3595bc95255a5a0603e038a1eb68
3
+ size 4997144072
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6364123669111c1a904ffe832a030df2c9539e61267e1e56a7c3f3d1bb2088a8
3
- size 4979764800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06644fb2f44e8b96391b0fc9d5b27c627969d76efe162767d523b484d94e828b
3
+ size 4982910656
model.safetensors.index.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 9568112640,
4
- "total_size": 19136325753
5
  },
6
  "weight_map": {
7
- "model.adapter_hiding_matrix": "model-00001-of-00004.safetensors",
8
  "model.adapter_token_ids": "model-00001-of-00004.safetensors",
9
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
10
  "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -794,7 +793,7 @@
794
  "model.layers.9.self_attn.o_proj.lora_B": "model-00002-of-00004.safetensors",
795
  "model.layers.9.self_attn.qkv_proj.base_layer.weight": "model-00001-of-00004.safetensors",
796
  "model.layers.9.self_attn.qkv_proj.lora_A_slices.0": "model-00001-of-00004.safetensors",
797
- "model.layers.9.self_attn.qkv_proj.lora_A_slices.1": "model-00001-of-00004.safetensors",
798
  "model.layers.9.self_attn.qkv_proj.lora_A_slices.2": "model-00002-of-00004.safetensors",
799
  "model.layers.9.self_attn.qkv_proj.lora_B_slices.0": "model-00002-of-00004.safetensors",
800
  "model.layers.9.self_attn.qkv_proj.lora_B_slices.1": "model-00002-of-00004.safetensors",
@@ -808,6 +807,6 @@
808
  "model.layers.9.shared_mlp.output_linear.lora_A": "model-00002-of-00004.safetensors",
809
  "model.layers.9.shared_mlp.output_linear.lora_B": "model-00002-of-00004.safetensors",
810
  "model.norm.weight": "model-00004-of-00004.safetensors",
811
- "model.token_to_group_mask": "model-00001-of-00004.safetensors"
812
  }
813
  }
 
1
  {
2
  "metadata": {
3
  "total_parameters": 9568112640,
4
+ "total_size": 19137028288
5
  },
6
  "weight_map": {
 
7
  "model.adapter_token_ids": "model-00001-of-00004.safetensors",
8
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
  "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
793
  "model.layers.9.self_attn.o_proj.lora_B": "model-00002-of-00004.safetensors",
794
  "model.layers.9.self_attn.qkv_proj.base_layer.weight": "model-00001-of-00004.safetensors",
795
  "model.layers.9.self_attn.qkv_proj.lora_A_slices.0": "model-00001-of-00004.safetensors",
796
+ "model.layers.9.self_attn.qkv_proj.lora_A_slices.1": "model-00002-of-00004.safetensors",
797
  "model.layers.9.self_attn.qkv_proj.lora_A_slices.2": "model-00002-of-00004.safetensors",
798
  "model.layers.9.self_attn.qkv_proj.lora_B_slices.0": "model-00002-of-00004.safetensors",
799
  "model.layers.9.self_attn.qkv_proj.lora_B_slices.1": "model-00002-of-00004.safetensors",
 
807
  "model.layers.9.shared_mlp.output_linear.lora_A": "model-00002-of-00004.safetensors",
808
  "model.layers.9.shared_mlp.output_linear.lora_B": "model-00002-of-00004.safetensors",
809
  "model.norm.weight": "model-00004-of-00004.safetensors",
810
+ "model.switch.control_to_substitute_lut": "model-00001-of-00004.safetensors"
811
  }
812
  }