forked from microsoft/CNTK
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTKDeserializer.h
92 lines (67 loc) · 3.22 KB
/
HTKDeserializer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "DataDeserializerBase.h"
#include "Config.h"
#include "CorpusDescriptor.h"
#include "UtteranceDescription.h"
#include "HTKChunkDescription.h"
#include "ConfigHelper.h"
#include <boost/noncopyable.hpp>
namespace Microsoft { namespace MSR { namespace CNTK {
// Class represents an HTK deserializer.
// Provides a set of chunks/sequences to the upper layers.
class HTKDeserializer : public DataDeserializerBase, private boost::noncopyable
{
public:
// Expects new configuration.
HTKDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& config, bool primary);
// TODO: Should be removed, when legacy config goes away, expects configuration in a legacy mode.
HTKDeserializer(CorpusDescriptorPtr corpus, const ConfigParameters& featureConfig, const std::wstring& featureName, bool primary);
// Get information about chunks.
virtual ChunkDescriptions GetChunkDescriptions() override;
// Get information about particular chunk.
virtual void GetSequencesForChunk(ChunkIdType chunkId, std::vector<SequenceDescription>& result) override;
// Retrieves data for a chunk.
virtual ChunkPtr GetChunk(ChunkIdType chunkId) override;
// Gets sequence description by the primary one.
virtual bool GetSequenceDescription(const SequenceDescription& primary, SequenceDescription&) override;
private:
class HTKChunk;
// Initialization functions.
void InitializeChunkDescriptions(const std::vector<std::string>& paths);
void InitializeStreams(const std::wstring& featureName);
void InitializeFeatureInformation();
void InitializeAugmentationWindow(const std::pair<size_t, size_t>& augmentationWindow);
// Gets sequence by its chunk id and id inside the chunk.
void GetSequenceById(ChunkIdType chunkId, size_t id, std::vector<SequenceDataPtr>&);
// Dimension of features.
size_t m_dimension;
// Type of the features.
ElementType m_elementType;
// Chunk descriptions.
std::vector<HTKChunkDescription> m_chunks;
// Augmentation window.
std::pair<size_t, size_t> m_augmentationWindow;
CorpusDescriptorPtr m_corpus;
// General configuration
int m_verbosity;
// Total number of frames.
size_t m_totalNumberOfFrames = 0;
// Flag that indicates whether a single speech frames should be exposed as a sequence.
bool m_frameMode;
// Used to correlate a sequence key with the sequence inside the chunk when deserializer is running not in primary mode.
// Key -> <chunkid, offset inside chunk>
std::map<size_t, std::pair<size_t, size_t>> m_keyToChunkLocation;
// Auxiliary data for checking against the data in the feature file.
unsigned int m_samplePeriod = 0;
size_t m_ioFeatureDimension = 0;
std::string m_featureKind;
// A flag that indicates whether the utterance should be extended to match the lenght of the utterance from the primary deserializer.
// TODO: This should be moved to the packers when deserializers work in sequence mode only.
bool m_expandToPrimary;
};
typedef std::shared_ptr<HTKDeserializer> HTKDeserializerPtr;
}}}