Skip to content

Commit

Permalink
Added options to set the size of s[0..n-1] (bytes) for integer alphabets
Browse files Browse the repository at this point in the history
  • Loading branch information
felipelouza committed Jun 4, 2018
1 parent f89c8d3 commit 97cf6d3
Show file tree
Hide file tree
Showing 16 changed files with 271 additions and 220 deletions.
3 changes: 2 additions & 1 deletion experiments/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ LIBOBJ = \
DEBUG = 0
STDERR = 0
M64 = 0
LARGE_ALPHABET = 0
##

DEFINES = -DDEBUG=$(DEBUG) -DSTDERR=$(STDERR) -DM64=$(M64)
DEFINES = -DDEBUG=$(DEBUG) -DSTDERR=$(STDERR) -DM64=$(M64) -DLARGE_ALPHABET=$(LARGE_ALPHABET)

CFLAGS += $(DEFINES)

Expand Down
14 changes: 8 additions & 6 deletions experiments/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ make
make run DIR=dataset INPUT=input-10000.txt K=10000 MODE=6
```

One can change to 64 bits integers (when n > 2^31) setting M64 to 1.
It is possible to change to 64 bits integers (when n > 2^31), setting M64 to 1.

```sh
make clean
Expand All @@ -31,6 +31,8 @@ MODE parameter specifies which algorithm is called by main.c:

SAIS\* and SACA-K\* are versions that receive an integer alphabet as input.

The type of array s[0..n-1] for integer alphabets can be defined here: ()[link]

--

**SA and LCP:**
Expand All @@ -44,7 +46,7 @@ MODE parameter:
make run MODE=8
```

One can compute LCP after SA construction using Phi-algorithm [5]:
LCP-array can be computed after SA construction using Phi-algorithm [5]:

```sh
make run MODE=6 LCP_COMPUTE=1
Expand All @@ -63,7 +65,7 @@ MODE parameter:
make run MODE=10
```

One can compute DA after SA construction using a variation of Algorithm 7.30 from Ohlebusch's book [6, page 347]:
DA can be computed after SA construction using a variation of Algorithm 7.30 from Ohlebusch's book [6, page 347]:

```sh
make run MODE=6 DA_COMPUTE=1
Expand Down Expand Up @@ -97,23 +99,23 @@ make run MODE=12

**Validate:**

One can check if the output produced is correct:
It is possible to check if the output produced is correct:

```sh
make run CHECK=1
```

**Output:**

One can output SA (LCP and DA) as $DIR$INPUT.sa (.lcp and .da):
SA (LCP and DA) can be written as $DIR$INPUT.sa (.lcp and .da):

```sh
make run OUPUT=1
```

**Compare:**

One can compare all algorithms:
All algorithms can be compared:

```sh
for i in {1..10}; do make run DIR=dataset INPUT=input-10000.txt K=10000 LCP_COMPUTE=1 DA_COMPUTE=1 MODE=$i; done
Expand Down
10 changes: 5 additions & 5 deletions experiments/lib/document_array.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "document_array.h"

#define chr(i) (cs==sizeof(int_t)?((int_t*)T)[i]:((unsigned char *)T)[i])
#define chr(i) (cs==sizeof(int_text)?((int_text*)T)[i]:((unsigned char *)T)[i])

/*******************************************************************/
int check_LF_array(int_t* SA, int_t* LF, uint_t n, unsigned char separator){
Expand Down Expand Up @@ -97,7 +97,7 @@ return 0;
}

/*******************************************************************/
int compute_LF_array_int(int_t* T, int_t* SA, int_t* LF, uint_t n, unsigned int SIGMA, int cs, uint_t k){
int compute_LF_array_int(int_text* T, int_t* SA, int_t* LF, uint_t n, unsigned int SIGMA, int cs, uint_t k){

uint_t i, sum=0;
int_t c;
Expand Down Expand Up @@ -125,7 +125,7 @@ return 0;

/*******************************************************************/

int document_array_LF_int(int_t* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k){
int document_array_LF_int(int_text* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k){

uint_t tmp, i, j, count=k;
compute_LF_array_int(T, SA, DA, n, SIGMA, cs, k);
Expand Down Expand Up @@ -179,7 +179,7 @@ return 1;

/*******************************************************************/

int document_array_check_int(int_t *T, int_t *SA, int_t *DA, uint_t n, int cs, uint_t k){
int document_array_check_int(int_text *T, int_t *SA, int_t *DA, uint_t n, int cs, uint_t k){

uint_t i, count=k;
int_t* R = (int_t*) malloc(n*sizeof(int_t));
Expand Down Expand Up @@ -212,7 +212,7 @@ int document_array_print(unsigned char *T, int_t *SA, int_t *DA, size_t n, int c

int_t j=SA[i];
for(j=SA[i]; (j<SA[i]+10); j++)
printf("%" PRIdN " ", chr(j));
printf("%" PRIdT " ", chr(j));
printf("\n");
}

Expand Down
4 changes: 2 additions & 2 deletions experiments/lib/document_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
#endif

int document_array_LF(unsigned char* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k);
int document_array_LF_int(int_t* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k);
int document_array_LF_int(int_text* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k);

/**/

int document_array_check(unsigned char *T, int_t *SA, int_t *DA, uint_t n, int cs, unsigned char separator, uint_t k);

int document_array_check_int(int_t *T, int_t *SA, int_t *DA, uint_t n, int cs, uint_t k);
int document_array_check_int(int_text *T, int_t *SA, int_t *DA, uint_t n, int cs, uint_t k);

int document_array_print(unsigned char *T, int_t *SA, int_t *DA, size_t n, int cs);
int document_array_write(int_t *DA, int_t n, char* c_file, const char* ext);
Expand Down
16 changes: 8 additions & 8 deletions experiments/lib/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,15 @@ int file_text_write(unsigned char *str, int_t n, char* c_file, const char* ext){
return 1;
}

int file_text_int_write(int_t *str, int_t n, char* c_file, const char* ext){
int file_text_int_write(int_text *str, int_t n, char* c_file, const char* ext){

FILE *f_out;
char *c_out = malloc((strlen(c_file)+strlen(ext))*sizeof(char));

sprintf(c_out, "%s.%s", c_file, ext);
f_out = file_open(c_out, "wb");

fwrite(str, sizeof(int_t), n, f_out);
fwrite(str, sizeof(int_text), n, f_out);

file_close(f_out);
free(c_out);
Expand Down Expand Up @@ -306,7 +306,7 @@ int_t file_text_read(unsigned char** str, char* c_file, const char* ext){
return n;
}

int_t file_text_int_read(int_t** str_int, char* c_file, const char* ext){
int_t file_text_int_read(int_text** str_int, char* c_file, const char* ext){

FILE *f_in;
char *c_in = malloc((strlen(c_file)+strlen(ext))*sizeof(char));
Expand All @@ -320,8 +320,8 @@ int_t file_text_int_read(int_t** str_int, char* c_file, const char* ext){

int_t n = size/sizeof(int_t);

*str_int = (int_t*) malloc(n*sizeof(int_t));
fread(*str_int, sizeof(int_t), n, f_in);
*str_int = (int_text*) malloc(n*sizeof(int_text));
fread(*str_int, sizeof(int_text), n, f_in);

file_close(f_in);
free(c_in);
Expand Down Expand Up @@ -352,7 +352,7 @@ int file_bwt_write(unsigned char *str, int_t *SA, int_t n, char* c_file, const c
return 1;
}

int file_bwt_int_write(int_t *str, int_t *SA, int_t n, char* c_file, const char* ext){
int file_bwt_int_write(int_text *str_int, int_t *SA, int_t n, char* c_file, const char* ext){

FILE *f_out;
char *c_out = malloc((strlen(c_file)+strlen(ext))*sizeof(char));
Expand All @@ -362,8 +362,8 @@ int file_bwt_int_write(int_t *str, int_t *SA, int_t n, char* c_file, const char*

int i;
for(i=0; i<n;i++){
int_t j = (SA[i])? str[SA[i]-1]:0;
fwrite(&j, sizeof(int_t), 1, f_out);
int_t j = (SA[i])? str_int[SA[i]-1]:0;
fwrite(&j, sizeof(int_text), 1, f_out);
}

file_close(f_out);
Expand Down
6 changes: 3 additions & 3 deletions experiments/lib/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ char* file_load(FILE* f_in) ;
char** file_load_multiple(char* c_file, int k, int_t* n) ;

int file_text_write(unsigned char *str, int_t n, char* c_file, const char* ext);
int file_text_int_write(int_t *str_int, int_t n, char* c_file, const char* ext);
int file_text_int_write(int_text *str_int, int_t n, char* c_file, const char* ext);

int_t file_text_read(unsigned char** str, char* c_file, const char* ext);
int_t file_text_int_read(int_t** str, char* c_file, const char* ext);
int_t file_text_int_read(int_text** str, char* c_file, const char* ext);

int file_bwt_write(unsigned char *str, int_t *SA, int_t n, char* c_file, const char* ext);
int file_bwt_int_write(int_t *str_int, int_t *SA, int_t n, char* c_file, const char* ext);
int file_bwt_int_write(int_text *str_int, int_t *SA, int_t n, char* c_file, const char* ext);

/*******************************************************************/

Expand Down
14 changes: 10 additions & 4 deletions experiments/lib/lcp_array.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "lcp_array.h"

#define chr(i) (cs==sizeof(int_t)?((int_t*)T)[i]:((unsigned char *)T)[i])
#define chr(i) (cs==sizeof(int_text)?((int_text*)T)[i]:((unsigned char *)T)[i])

/*******************************************************************/

Expand Down Expand Up @@ -80,7 +80,7 @@ return 0;

/*******************************************************************/

int lcp_PHI_int(int_t* T, int_t* SA, int_t* LCP, uint_t n, int cs){
int lcp_PHI_int(int_text* T, int_t* SA, int_t* LCP, uint_t n, int cs){

uint_t* PLCP = (uint_t*) malloc(n * sizeof(uint_t));;

Expand Down Expand Up @@ -190,7 +190,13 @@ int lcp_array_check_lcp(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int c
clock_t c_start=0;

time_start(&t_start, &c_start);
lcp_PHI(T, SA, LCP2, n, sizeof(char), 1);//separator=1

if(cs==sizeof(char))
lcp_PHI(T, SA, LCP2, n, sizeof(char), 1);//separator=1
else
lcp_PHI_int((int_text*)T, SA, LCP2, n, cs);


fprintf(stderr,"%.6lf\n", time_stop(t_start, c_start));

for(i=0; i<n; i++){
Expand Down Expand Up @@ -225,7 +231,7 @@ int lcp_array_print(unsigned char *T, int_t *SA, int_t *LCP, size_t n, int cs){

int_t j=SA[i];
for(j=SA[i]; (j<SA[i]+min(10,LCP[i]+10)); j++)
printf("%" PRIdN " ", chr(j));
printf("%" PRIdT " ", chr(j));
printf("\n");
}

Expand Down
2 changes: 1 addition & 1 deletion experiments/lib/lcp_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ int lcp_kasai(char* T, int_t* SA, uint_t n, int_t* LCP);
* @param n The length of the given string.
*/
int lcp_PHI(unsigned char* T, int_t* SA, int_t* LCP, uint_t n, int cs, unsigned char separator);
int lcp_PHI_int(int_t* T, int_t* SA, int_t* LCP, uint_t n, int cs);
int lcp_PHI_int(int_text* T, int_t* SA, int_t* LCP, uint_t n, int cs);

int lcp_array_check(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int cs, unsigned char separator);
int lcp_array_check_phi(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int cs, unsigned char separator);
Expand Down
6 changes: 3 additions & 3 deletions experiments/lib/suffix_array.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "suffix_array.h"

#define chr(i) (cs==sizeof(int_t)?((int_t*)T)[i]:((unsigned char *)T)[i])
#define chr(i) (cs==sizeof(int_text)?((int_text*)T)[i]:((unsigned char *)T)[i])

/*******************************************************************/

Expand Down Expand Up @@ -56,7 +56,7 @@ int_t i;

int_t j=SA[i];
for(j=SA[i]; (j<SA[i]+5); j++)
printf("%" PRIdN " ", chr(j));
printf("%" PRIdT " ", chr(j));
printf("\n");
}

Expand Down Expand Up @@ -101,7 +101,7 @@ int_t suffix_array_check(unsigned char *T, int_t *SA, size_t n, int cs, unsigne
printf("#%" PRIdN ") %" PRIdN ", %" PRIdN "&\n", i, SA[i], SA[i+1]);

for(j=SA[i], k=SA[i+1]; (j<SA[i]+5); j++, k++)
printf("%" PRIdN " | %" PRIdN "\n", chr(j), chr(k));
printf("%" PRIdT " | %" PRIdT "\n", chr(j), chr(k));
printf("\n");

return 0;
Expand Down
6 changes: 3 additions & 3 deletions experiments/lib/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ int_t min_range(int_t* A, int_t l, int_t r){
return min;
}
/*******************************************************************/
int_t* cat_int(unsigned char** R, int k, int_t *n){
int_text* cat_int(unsigned char** R, int k, int_t *n){

(*n)++; //add 0 at the end

int_t i, j;
int_t l=0;
int_t *str_int = (int_t*) malloc((*n)*sizeof(int_t));
int_text *str_int = (int_text*) malloc((*n)*sizeof(int_text));

for(i=0; i<k; i++){
int_t m = strlen((char*)R[i]);
Expand All @@ -106,7 +106,7 @@ int_t* cat_int(unsigned char** R, int k, int_t *n){

str_int[l++]=0;
if(*n>l){
str_int = (int_t*) realloc(str_int, (l)*sizeof(int_t));
str_int = (int_text*) realloc(str_int, (l)*sizeof(int_text));
printf("N = %" PRIdN "\n", l);
}
*n = l;
Expand Down
38 changes: 25 additions & 13 deletions experiments/lib/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,31 @@
#endif

#if M64
typedef int64_t int_t;
typedef uint64_t uint_t;
#define PRIdN PRId64
#define U_MAX UINT64_MAX
#define I_MAX INT64_MAX
#define I_MIN INT64_MIN
typedef int64_t int_t;
typedef uint64_t uint_t;
#define PRIdN PRId64
#define U_MAX UINT64_MAX
#define I_MAX INT64_MAX
#define I_MIN INT64_MIN
#else
typedef int32_t int_t;
typedef uint32_t uint_t;
#define PRIdN PRId32
#define U_MAX UINT32_MAX
#define I_MAX INT32_MAX
#define I_MIN INT32_MIN
typedef int32_t int_t;
typedef uint32_t uint_t;
#define PRIdN PRId32
#define U_MAX UINT32_MAX
#define I_MAX INT32_MAX
#define I_MIN INT32_MIN
#endif

#ifndef LARGE_ALPHABET
#define LARGE_ALPHABET 0
#endif

#if M64 && LARGE_ALPHABET //8N bytes for s[0..n-1]
typedef int64_t int_text;
#define PRIdT PRId64
#else //2N bytes for s[0..n-1]
typedef int16_t int_text;
#define PRIdT PRId16
#endif

/**********************************************************************/
Expand All @@ -59,7 +71,7 @@ int_t min_range(int_t* A, int_t l, int_t r);

/**********************************************************************/

int_t* cat_int(unsigned char** R, int k, int_t *n);
int_text* cat_int(unsigned char** R, int k, int_t *n);
unsigned char* cat_char(unsigned char** R, int k, int_t *n);

double log2(double i);
Expand Down
Loading

0 comments on commit 97cf6d3

Please sign in to comment.