forked from microsoft/CNTK
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCPUSparseMatrix.h
359 lines (307 loc) · 15 KB
/
CPUSparseMatrix.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <stdio.h>
#include "CPUMatrix.h"
//#include "GPUMatrix.h"
//#include "GPUSparseMatrix.h"
#include <map>
#include <unordered_map>
#ifdef _WIN32
#ifdef MATH_EXPORTS
#define MATH_API __declspec(dllexport)
#else
#define MATH_API __declspec(dllimport)
#endif
#endif /* Linux - already defined in CPUMatrix.h */
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class MATH_API CPUSparseMatrix : public BaseMatrix<ElemType>
{
typedef BaseMatrix<ElemType> Base;
using Base::m_numRows;
using Base::m_numCols;
using Base::m_sliceViewOffset;
using Base::SetBuffer;
using Base::HasExternalBuffer;
using Base::GetNumStorageRows;
using Base::SetNumStorageRows;
using Base::GetNumStorageCols;
using Base::SetNumStorageCols;
using Base::SetComputeDeviceId;
using Base::SetSizeAllocated;
using Base::GetSizeAllocated;
using Base::GetCompIndex;
using Base::SetCompIndex;
using Base::GetUnCompIndex;
using Base::SetUnCompIndex;
using Base::GetCompIndexSize;
using Base::SetCompIndexSize;
using Base::GetColIdx;
using Base::SetColIdx;
using Base::GetBlockSize;
using Base::SetBlockSize;
using Base::GetBlockIds;
using Base::SetBlockIds;
using Base::GetBlockIdShift;
using Base::SetBlockIdShift;
using Base::ZeroInit;
using Base::ZeroValues;
using Base::m_sob;
using Base::ShallowCopyFrom;
using Base::VerifyResizable;
public:
using Base::VerifyWritable;
using Base::GetComputeDeviceId;
using Base::Buffer;
using Base::GetNumRows;
using Base::GetNumCols;
using Base::GetNumElements;
using Base::OwnBuffer;
using Base::GetFormat;
using Base::SetFormat;
using Base::IsEmpty;
private:
void ZeroInit();
void CheckInit(const MatrixFormat format);
public:
explicit CPUSparseMatrix(const MatrixFormat format);
CPUSparseMatrix(const MatrixFormat format, const size_t numRows, const size_t numCols, const size_t size);
CPUSparseMatrix(const CPUSparseMatrix<ElemType>& deepCopyFrom); // copy constructor, deep copy
CPUSparseMatrix<ElemType>& operator=(const CPUSparseMatrix<ElemType>& deepCopyFrom); // assignment operator, deep copy
CPUSparseMatrix(CPUSparseMatrix<ElemType>&& moveFrom); // move constructor, shallow copy
CPUSparseMatrix<ElemType>& operator=(CPUSparseMatrix<ElemType>&& moveFrom); // move assignment operator, shallow copy
~CPUSparseMatrix();
public:
void SetValue(const size_t row, const size_t col, ElemType val);
//void SetValue(const CPUMatrix<ElemType>& /*val*/);
//void SetValue(const GPUMatrix<ElemType>& /*val*/);
void SetValue(const CPUSparseMatrix<ElemType>& /*val*/);
//void SetValue(const GPUSparseMatrix<ElemType>& /*val*/);
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val, size_t numColsPerMaskEntry);
CPUSparseMatrix<ElemType>& AssignOneHot(const CPUMatrix<ElemType>& a, vector<size_t>& shape, size_t axis);
CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
CPUSparseMatrix<ElemType>& DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
size_t BufferSize() const
{
return GetSizeAllocated() * sizeof(ElemType);
}
ElemType* Data() const;
inline size_t GetNumElemAllocated() const
{
return GetSizeAllocated();
}
CPUSparseMatrix<ElemType> ColumnSlice(size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType> CopyColumnSliceToDense(size_t startColumn, size_t numCols) const;
void AssignColumnSliceToDense(CPUMatrix<ElemType>& slice, size_t startColumn, size_t numCols) const;
CPUMatrix<ElemType> DiagonalToDense() const;
void SetGaussianRandomValue(const ElemType /*mean*/, const ElemType /*sigma*/, unsigned long /*seed*/)
{
NOT_IMPLEMENTED;
}
void SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYPE* h_CSCCol, const CPUSPARSE_INDEX_TYPE* h_Row, const ElemType* h_Val,
const size_t nz, const size_t numRows, const size_t numCols);
void SetMatrixFromSBCFormat(const size_t* blockIds, const ElemType* val, const size_t numBlocks, const size_t numRows, const size_t numCols);
// Dense * Sparse -> Dense
static void MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c);
// Sparse * Dense -> Dense
static void MultiplyAndWeightedAdd(ElemType alpha, const CPUSparseMatrix<ElemType>& lhs, const bool transposeA,
const CPUMatrix<ElemType>& rhs, const bool transposeB, ElemType beta, CPUMatrix<ElemType>& c);
// Dense * Sparse -> Sparse
static void MultiplyAndAdd(ElemType alpha, const CPUMatrix<ElemType>& lhs, const bool transposeA,
const CPUSparseMatrix<ElemType>& rhs, const bool transposeB, CPUSparseMatrix<ElemType>& c);
static void ColumnwiseScaleAndWeightedAdd(ElemType alpha, const CPUSparseMatrix<ElemType>& a, const CPUMatrix<ElemType>& v, ElemType beta, CPUMatrix<ElemType>& c);
static void ScaleAndAdd(const ElemType alpha, const CPUSparseMatrix<ElemType>& lhs, CPUMatrix<ElemType>& c);
static bool AreEqual(const CPUSparseMatrix<ElemType>& a, const CPUSparseMatrix<ElemType>& b, const ElemType threshold = 1e-8);
// sum(vec(a).*vec(b))
static ElemType InnerProductOfMatrices(const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/)
{
NOT_IMPLEMENTED;
}
static void InnerProduct(const CPUSparseMatrix<ElemType>& a, const CPUMatrix<ElemType>& b, CPUMatrix<ElemType>& c, const bool isColWise);
static void AddScaledDifference(const ElemType /*alpha*/, const CPUSparseMatrix<ElemType>& /*a*/, const CPUMatrix<ElemType>& /*b*/, CPUMatrix<ElemType>& /*c*/,
bool /*bDefaultZero*/)
{
NOT_IMPLEMENTED;
}
static void AddScaledDifference(const ElemType /*alpha*/, const CPUMatrix<ElemType>& /*a*/, const CPUSparseMatrix<ElemType>& /*b*/, CPUMatrix<ElemType>& /*c*/,
bool /*bDefaultZero*/)
{
NOT_IMPLEMENTED;
}
int GetComputeDeviceId() const
{
return -1;
}
// Allocate actually allocates the storage space for numNZElemToReserve elements. This is different than resizing, which changes the dimensions of the underlying matrix.
// Unfortunately numRows/numCols need to be passed in in the case of various matrix formats (e.g., SparseCSC), because some of the dimensions allocated depend on the
// dimensions of the matrix.
void Allocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 10000, const bool growOnly = true, bool keepExistingValues = false); // matrix format will affect the size to allocate
// RequireSizeAndAllocate is required by SpasreMatrix since resizing the dimensions and allocating storage are different operations. Since a Resize can entail changing
// MatrixFormat, we offer an overload for that case.
void RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true, bool keepExistingValues = true); // matrix format will affect the size to allocate
// Otherwise we will just use the current MatrixFormat.
void RequireSizeAndAllocate(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 10000, const bool growOnly = true, bool keepExistingValues = false);
// Sparse matrix RequireSize is similar to dense matrix RequireSize in that it will only allocate the minimum amount of storage requried to successfully create the matrix.
// This is required because some formats (e.g., SparseCSC) require the SecondaryIndexLocation to have valid data in order to compute m_nz. Otherwise this method would not
// update the storage at all.
void RequireSize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat format, const bool growOnly = true);
void RequireSize(const size_t numRows, const size_t numCols, const MatrixFormat format, const bool growOnly = true)
{
return RequireSize(numRows, numCols, 0, format, growOnly);
}
// Allows RequireSize to be called without modifying the MatrixFormat.
void RequireSize(const size_t numRows, const size_t numCols, const bool growOnly = true);
// Resizes the dimensions of the underlying sparse matrix object. Since the caller may have a hint for m_nz, we allow that to be passed, but this is terrible design. In the
// future if we want better separation between allocation and resizing, this interface should be updated to be less clunky.
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); // matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 10000, const bool growOnly = true);
void Reset();
const ElemType operator()(const size_t row, const size_t col) const
{
if (col >= m_numCols || row >= m_numRows)
{
RuntimeError("Position outside matrix dimensions");
}
if (GetFormat() == MatrixFormat::matrixFormatSparseCSC)
{
size_t start = SecondaryIndexLocation()[col];
size_t end = SecondaryIndexLocation()[col + 1];
for (size_t p = start; p < end; p++)
{
size_t i = MajorIndexLocation()[p];
if (i == row)
{
return ((ElemType*)Buffer())[p];
}
}
return 0;
}
else if (GetFormat() == MatrixFormat::matrixFormatSparseBlockCol)
{
for (size_t blockId = 0; blockId < GetBlockSize(); blockId++)
{
size_t blockCol = GetBlockIds()[blockId] - GetBlockIdShift();
if (blockCol == col)
{
return ((ElemType*)Buffer())[blockId * GetNumRows() + row];
}
}
return 0;
}
NOT_IMPLEMENTED;
}
public:
void NormalGrad(CPUMatrix<ElemType>& c, const ElemType momentum, bool unitGainMomentum = true);
ElemType Adagrad(CPUMatrix<ElemType>& c, const bool needAveMultiplier);
void AdaDelta(CPUMatrix<ElemType>& c, CPUMatrix<ElemType>& functionValues, ElemType learningRate, ElemType rho, ElemType epsilon);
public:
CPUSparseMatrix<ElemType>& InplaceTruncateTop(const ElemType threshold);
CPUSparseMatrix<ElemType>& InplaceTruncateBottom(const ElemType threshold);
CPUSparseMatrix<ElemType>& InplaceTruncate(const ElemType threshold);
CPUSparseMatrix<ElemType>& InplaceSoftThreshold(const ElemType threshold);
ElemType FrobeniusNorm() const; // useful for comparing CPU and GPU results
ElemType SumOfAbsElements() const; // sum of all abs(elements)
ElemType SumOfElements() const; // sum of all elements
public:
void Print(const char* matrixName, ptrdiff_t rowStart, ptrdiff_t rowEnd, ptrdiff_t colStart, ptrdiff_t colEnd) const;
void Print(const char* matrixName = NULL) const; // print whole matrix. can be expensive
public:
const ElemType* NzValues() const
{
return Data();
}
inline ElemType* NzValues()
{
return Data();
}
size_t NzCount() const
{
if (GetFormat() == matrixFormatSparseCSC)
return GetCompIndex()[GetNumCols()] - GetCompIndex()[0];
else if (GetFormat()== matrixFormatSparseCSR)
return GetCompIndex()[GetNumRows()] - GetCompIndex()[0];
else if (GetFormat() == matrixFormatSparseBlockCol)
return GetBlockSize() * GetNumRows();
else
NOT_IMPLEMENTED;
}
size_t NzSize() const
{
return sizeof(ElemType) * NzCount();
} // actual number of element bytes in use
void SetBlockSize(size_t newBlockSize)
{
BaseMatrix<ElemType>::SetBlockSize(newBlockSize);
}
size_t GetBlockSize() const
{
return BaseMatrix<ElemType>::GetBlockSize();
}
size_t* BlockIdsLocation() const
{
if ((GetFormat() != matrixFormatSparseBlockCol) && (GetFormat() != matrixFormatSparseBlockRow))
LogicError("CPUSparseMatrix::BlockIdsLocation is only applicable to sparse block formats");
return GetBlockIds();
}
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const
{
return (GetUnCompIndex() +
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
} // this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const
{
return NzCount();
}
size_t MajorIndexSize() const
{
return sizeof(CPUSPARSE_INDEX_TYPE) * MajorIndexCount();
} // actual number of major index bytes in use
// Returns the start of the secondary index valid for the slice-view.
// Secondary index provides the offset to the data buffer for the values.
// E.g. for CSC the the first nonzero value of column k is Buffer(SecondaryIndexLocation[k])
CPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const
{
return GetCompIndex() + m_sliceViewOffset;
}
size_t SecondaryIndexCount() const
{
if (GetFormat() & matrixFormatCompressed)
{
size_t cnt = (GetFormat() & matrixFormatRowMajor) ? m_numRows : m_numCols;
if (cnt > 0)
cnt++; // add an extra element on the end for the "max" value
return cnt;
}
else
return NzCount(); // COO format
}
// get size for compressed index
size_t SecondaryIndexSize() const
{
return (SecondaryIndexCount()) * sizeof(CPUSPARSE_INDEX_TYPE);
}
// the column and row locations will swap based on what format we are in. Full index always follows the data array
CPUSPARSE_INDEX_TYPE* RowLocation() const
{
return (GetFormat() & matrixFormatRowMajor) ? SecondaryIndexLocation() : MajorIndexLocation();
}
size_t RowSize() const
{
return (GetFormat() & matrixFormatRowMajor) ? SecondaryIndexSize() : MajorIndexSize();
}
CPUSPARSE_INDEX_TYPE* ColLocation() const
{
return (GetFormat() & matrixFormatRowMajor) ? MajorIndexLocation() : SecondaryIndexLocation();
}
size_t ColSize() const
{
return (GetFormat() & matrixFormatRowMajor) ? MajorIndexSize() : SecondaryIndexSize();
} // actual number of bytes in use
};
typedef CPUSparseMatrix<float> CPUSingleSparseMatrix;
typedef CPUSparseMatrix<double> CPUDoubleSparseMatrix;
} } }