@@ -56,13 +56,31 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
56
56
const struct ggml_tensor * src0 = t->src [0 ];
57
57
const struct ggml_tensor * src1 = t->src [1 ];
58
58
59
+ std::string wname;
60
+ {
61
+ // remove any prefix and suffixes from the name
62
+ // CUDA0#blk.0.attn_k.weight#0 => blk.0.attn_k.weight
63
+ const char * p = strchr (src0->name , ' #' );
64
+ if (p != NULL ) {
65
+ p = p + 1 ;
66
+ const char * q = strchr (p, ' #' );
67
+ if (q != NULL ) {
68
+ wname = std::string (p, q - p);
69
+ } else {
70
+ wname = p;
71
+ }
72
+ } else {
73
+ wname = src0->name ;
74
+ }
75
+ }
76
+
59
77
// when ask is true, the scheduler wants to know if we are interested in data from this tensor
60
78
// if we return true, a follow-up call will be made with ask=false in which we can do the actual collection
61
79
if (ask) {
62
80
if (t->op == GGML_OP_MUL_MAT_ID) return true ; // collect all indirect matrix multiplications
63
81
if (t->op != GGML_OP_MUL_MAT) return false ;
64
82
if (src1->ne [1 ] < 16 || src1->type != GGML_TYPE_F32) return false ;
65
- if (!(strncmp (src0-> name , " blk. " , 4 ) == 0 || (m_params.collect_output_weight && strcmp (src0-> name , " output.weight" ) == 0 ))) return false ;
83
+ if (!(wname. substr ( 0 , 4 ) == " blk. " || (m_params.collect_output_weight && wname == " output.weight" ))) return false ;
66
84
return true ;
67
85
}
68
86
@@ -94,20 +112,20 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
94
112
// this is necessary to guarantee equal number of "ncall" for each tensor
95
113
for (int ex = 0 ; ex < n_as; ++ex) {
96
114
src0 = t->src [2 + ex];
97
- auto & e = m_stats[src0-> name ];
115
+ auto & e = m_stats[wname ];
98
116
if (e.values .empty ()) {
99
117
e.values .resize (src1->ne [0 ], 0 );
100
118
}
101
119
else if (e.values .size () != (size_t )src1->ne [0 ]) {
102
- fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , src0-> name , (int )e.values .size (), (int )src1->ne [0 ]);
120
+ fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname. c_str () , (int )e.values .size (), (int )src1->ne [0 ]);
103
121
exit (1 ); // GGML_ASSERT(false);
104
122
}
105
123
// NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
106
124
// using the following line, we can correct for that if needed
107
125
// if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
108
126
++e.ncall ;
109
127
if (m_params.verbosity > 1 ) {
110
- printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, src0-> name , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
128
+ printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, wname. c_str () , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
111
129
}
112
130
for (int row = 0 ; row < (int )src1->ne [1 ]; ++row) {
113
131
const int excur = m_ids[row*n_as + idx];
@@ -129,17 +147,17 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
129
147
}
130
148
}
131
149
} else {
132
- auto & e = m_stats[src0-> name ];
150
+ auto & e = m_stats[wname ];
133
151
if (e.values .empty ()) {
134
152
e.values .resize (src1->ne [0 ], 0 );
135
153
}
136
154
else if (e.values .size () != (size_t )src1->ne [0 ]) {
137
- fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , src0-> name , (int )e.values .size (), (int )src1->ne [0 ]);
155
+ fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname. c_str () , (int )e.values .size (), (int )src1->ne [0 ]);
138
156
exit (1 ); // GGML_ASSERT(false);
139
157
}
140
158
++e.ncall ;
141
159
if (m_params.verbosity > 1 ) {
142
- printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, src0-> name , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
160
+ printf (" %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, wname. c_str () , ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
143
161
}
144
162
for (int row = 0 ; row < (int )src1->ne [1 ]; ++row) {
145
163
const float * x = data + row * src1->ne [0 ];
0 commit comments