Skip to content

Commit

Permalink
added pl vmat src dst.pmat --force_float option to force the destinat…
Browse files Browse the repository at this point in the history
…ion file to be in float format. This allow to save disk space but could loose precission depending of the src dataset.
  • Loading branch information
nouiz committed Jul 8, 2009
1 parent 48f7a12 commit f6afcd9
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 22 deletions.
3 changes: 2 additions & 1 deletion commands/PLearnCommands/VMatCommand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ VMatCommand::VMatCommand():
" ( will work only if your executable includes commands/PLearnCommands/VMatViewCommand.h )\n"
" or: vmat stats <dataset> \n"
" Will display basic statistics for each field \n"
" or: vmat convert <source> <destination> [--cols=col1,col2,col3,...] [--mat_to_mem] [--save_vmat]\n"
" or: vmat convert <source> <destination> [--cols=col1,col2,col3,...] [--mat_to_mem] [--save_vmat] [--force_float]\n"
" To convert any dataset into a .amat, .pmat, .dmat, .vmat, .csv or .arff format. \n"
" The extension of the destination is used to determine the format you want. \n"
" If the option --cols is specified, it requests to keep only the given columns\n"
Expand All @@ -82,6 +82,7 @@ VMatCommand::VMatCommand():
" If the option --save_vmat is specified, we save the source vmat in the destination metadatadir\n"
" If the option --update is specified, we generate the <destination> only when the <source> file is newer\n"
" then the destination file or when the destination file is missing\n"
" If .pmat is specified as the destination file, the option --force_float will save the data in float format\n"
" If .csv (Comma-Separated Value) is specified as the destination file, the \n"
" following additional options are also supported:\n"
" --skip-missings: if a row (after selecting the appropriate columns) contains\n"
Expand Down
17 changes: 13 additions & 4 deletions plearn/misc/vmatmain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ int vmatmain(int argc, char** argv)
{
if(argc<4)
PLERROR("Usage: vmat convert <source> <destination> "
"[--mat_to_mem] [--cols=col1,col2,col3,...] [--save_vmat] [--skip-missings] [--precision=N] [--delimiter=CHAR]");
"[--mat_to_mem] [--cols=col1,col2,col3,...] [--save_vmat] [--skip-missings] [--precision=N] [--delimiter=CHAR] [--force_float]");

string source = argv[2];
string destination = argv[3];
Expand Down Expand Up @@ -689,6 +689,8 @@ int vmatmain(int argc, char** argv)
* --update
* :: we generate the <destination> only when the <source> file is newer than
* :: the destination file or when the destination file is missing
* --force_float
* :: if the destination is a pmat, we force the pmat file to be in float format
*/
TVec<string> columns;
TVec<string> date_columns;
Expand All @@ -698,6 +700,10 @@ int vmatmain(int argc, char** argv)
bool convert_date = false;
bool save_vmat = false;
bool update = false;
bool force_float = false;

string ext = extract_extension(destination);

for (int i=4 ; i < argc && argv[i] ; ++i) {
string curopt = removeblanks(argv[i]);
if (curopt == "")
Expand All @@ -716,6 +722,7 @@ int vmatmain(int argc, char** argv)
precision = toint(curopt.substr(12));
}
else if (curopt.substr(0,12) == "--delimiter=") {
PLCHECK(ext==".cvs");
delimiter = curopt.substr(12);
}
else if (curopt == "--convert-date")
Expand All @@ -726,7 +733,10 @@ int vmatmain(int argc, char** argv)
save_vmat = true;
else if (curopt == "--update")
update = true;
else
else if (curopt == "--force_float"){
PLCHECK(ext==".pmat");
force_float = true;
}else
PLWARNING("VMat convert: unrecognized option '%s'; ignoring it...",
curopt.c_str());
}
Expand All @@ -738,7 +748,6 @@ int vmatmain(int argc, char** argv)
if (columns.size() > 0)
vm = new SelectColumnsVMatrix(vm, columns);

string ext = extract_extension(destination);
if (ext != ".csv" && skip_missings)
PLWARNING("Option '--skip-missings' not supported for extension '%s'; ignoring it...",
ext.c_str());
Expand All @@ -750,7 +759,7 @@ int vmatmain(int argc, char** argv)
// Save strings as strings so they are not lost.
vm->saveAMAT(destination, true, false, true);
else if(ext==".pmat")
vm->savePMAT(destination);
vm->savePMAT(destination, force_float);
else if(ext==".dmat")
vm->saveDMAT(destination);
else if(ext == ".csv")
Expand Down
26 changes: 13 additions & 13 deletions plearn/vmat/FileVMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ FileVMatrix::FileVMatrix(const PPath& filename, bool writable_):
build_();
}

FileVMatrix::FileVMatrix(const PPath& filename, int the_length, int the_width):
FileVMatrix::FileVMatrix(const PPath& filename, int the_length, int the_width,
bool force_float):
inherited (the_length, the_width, true),
filename_ (filename.absolute()),
f (0),
force_float (force_float),
build_new_file (true)
{
remove_when_done = track_ref = -1;
Expand Down Expand Up @@ -175,6 +177,8 @@ void FileVMatrix::build_()
#ifdef BIGENDIAN
file_is_bigendian = true;
#endif
if(force_float)
file_is_float = true;

updateHeader();

Expand Down Expand Up @@ -490,22 +494,18 @@ void FileVMatrix::flush()
//////////////////
void FileVMatrix::updateHeader() {
char header[DATAFILE_HEADERLENGTH];
#ifdef USEFLOAT
#ifdef LITTLEENDIAN
sprintf(header,"MATRIX %d %d FLOAT LITTLE_ENDIAN", length_, width_);
#endif
#ifdef BIGENDIAN
sprintf(header,"MATRIX %d %d FLOAT BIG_ENDIAN", length_, width_);
#endif
#endif
#ifdef USEDOUBLE
string real = "DOUBLE";
if(file_is_float)
real = "FLOAT";

#ifdef LITTLEENDIAN
sprintf(header,"MATRIX %d %d DOUBLE LITTLE_ENDIAN", length_, width_);
sprintf(header,"MATRIX %d %d %s LITTLE_ENDIAN", length_, width_, real.c_str());
#endif
#ifdef BIGENDIAN
sprintf(header,"MATRIX %d %d DOUBLE BIG_ENDIAN", length_, width_);
#endif
sprintf(header,"MATRIX %d %d %s BIG_ENDIAN", length_, width_, real.c_str());
#endif


int pos = strlen(header);
for(; pos<DATAFILE_HEADERLENGTH; pos++)
{
Expand Down
4 changes: 3 additions & 1 deletion plearn/vmat/FileVMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class FileVMatrix: public RowBufferedVMatrix
#endif
bool file_is_bigendian;
bool file_is_float;
bool force_float;

private:

Expand All @@ -85,7 +86,8 @@ class FileVMatrix: public RowBufferedVMatrix

FileVMatrix();
FileVMatrix(const PPath& filename, bool writable_=false); //!< opens an existing file
FileVMatrix(const PPath& filename, int the_length, int the_width); //!< create a new matrix file
FileVMatrix(const PPath& filename, int the_length, int the_width,
bool force_float=false); //!< create a new matrix file
FileVMatrix(const PPath& filename, int the_length, const TVec<string>& fieldnames); //!< create a new matrix file

protected:
Expand Down
4 changes: 2 additions & 2 deletions plearn/vmat/VMatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2019,7 +2019,7 @@ void VMatrix::save(const PPath& filename) const
//////////////
// savePMAT //
//////////////
void VMatrix::savePMAT(const PPath& pmatfile) const
void VMatrix::savePMAT(const PPath& pmatfile, const bool force_float) const
{
if (width() == -1)
PLERROR("In VMat::save - Saving in a pmat file is only possible for constant width VMats (where width()!=-1)");
Expand All @@ -2028,7 +2028,7 @@ void VMatrix::savePMAT(const PPath& pmatfile) const
PPath pmatfiletmp=pmatfile+".tmp";

{
FileVMatrix m(pmatfiletmp,nsamples,width());
FileVMatrix m(pmatfiletmp,nsamples,width(),force_float);
m.setMetaInfoFrom(this);
// m.setFieldInfos(getFieldInfos());
// m.copySizesFrom(this);
Expand Down
3 changes: 2 additions & 1 deletion plearn/vmat/VMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ class VMatrix: public Object
virtual void save(const PPath& filename) const;

/// Save the VMatrix in PMat format
virtual void savePMAT(const PPath& pmatfile) const;
virtual void savePMAT(const PPath& pmatfile,
const bool force_float=false) const;

/// Save the VMatrix in DMat format
virtual void saveDMAT(const PPath& dmatdir) const;
Expand Down

0 comments on commit f6afcd9

Please sign in to comment.