diff --git a/README.md b/README.md index 84e0b27..bd166d3 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # gsa-is -gSAIS and gSACA-K are suffix array construction algorithms for string collections. +gSAIS and gSACA-K are suffix array (SA) construction algorithms for string collections. ## Introduction gSAIS and gSACA-K [1, 2] extend the linear-time suffix sorting algorithms SAIS -[3] and SACA-K [4] to compute the suffix array for a string collection, +[3] and SACA-K [4] to compute SA for string collections, maintaining their theoretical bounds and improving their practical performance. Moreover, gSAIS and gSACA-K can also compute the LCP-array (LCP) and the @@ -28,7 +28,7 @@ An ANSI C Compiler (e.g. GNU GCC) /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 - * @param SA suffix array + * @param SA Suffix array * @param LCP LCP array * @param DA Document array * @param n string length @@ -39,8 +39,8 @@ int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n); /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * - * @param s input concatenated string, with s[n-1]=0 - * @param SA suffix array + * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 + * @param SA Suffix array * @param LCP LCP array * @param DA Document array * @param n string length @@ -151,4 +151,4 @@ Please, if you use this tool in an academic setting cite the following paper: ## Thanks -Thanks to [Giovanni Manzini](https://github.com/giovmanz) and [Giovanna Rosone](https://github.com/giovannarosone) by helpful suggestions and debugging. +Thanks to [Giovanni Manzini](https://github.com/giovmanz), [Giovanna Rosone](https://github.com/giovannarosone) and René de R. Sacramento by helpful suggestions and debugging. diff --git a/gsacak.h b/gsacak.h index f73f77d..5287031 100644 --- a/gsacak.h +++ b/gsacak.h @@ -1,3 +1,5 @@ +// vim: noai:ts=2:sw=2 + /* * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles * contact: louza@ic.unicamp.br @@ -92,10 +94,10 @@ int sacak_int(int_text *s, uint_t *SA, uint_t n, uint_t k); /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 - * @param SA suffix array + * @param SA Suffix array * @param LCP LCP array * @param DA Document array - * @param n string length + * @param n String length * * @return depth of the recursive calls. */ @@ -103,8 +105,14 @@ int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n); /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * - * @param s input concatenated string, with s[n-1]=0 - * @param K alphabet size + * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 + * @param SA Suffix array + * @param LCP LCP array + * @param DA Document array + * @param n String length + * @param K Alphabet size + * + * @return depth of the recursive calls. */ int gsacak_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k); diff --git a/gsais.h b/gsais.h index 667ccd5..480cede 100644 --- a/gsais.h +++ b/gsais.h @@ -1,3 +1,5 @@ +// vim: noai:ts=2:sw=2 + /* * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles * contact: louza@ic.unicamp.br @@ -75,9 +77,9 @@ typedef uint_t int_da; /** @brief computes the suffix array of string s[0..n-1] * * @param s input string with s[n-1]=0 - * @param SA suffix array + * @param SA suffix array * @param n string length - * @return -1 if an error occured, otherwise the depth of the recursive calls. + * @return -1 if an error occured, otherwise the depth of the recursive calls. */ int sais(unsigned char *s, uint_t *SA, uint_t n); @@ -91,20 +93,27 @@ int sais_int(int_text *s, uint_t *SA, uint_t n, uint_t k); /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * - * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 - * @param SA suffix array - * @param LCP LCP array - * @param DA Document array - * @param n string length - * + * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 + * @param SA Suffix array + * @param LCP LCP array + * @param DA Document array + * @param n String length + * @param K alphabet size + * * @return depth of the recursive calls. */ int gsais(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n); /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] * - * @param s input concatenated string, with s[n-1]=0 - * @param K alphabet size + * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 + * @param SA Suffix array + * @param LCP LCP array + * @param DA Document array + * @param n String length + * @param K Alphabet size + * + * @return depth of the recursive calls. */ int gsais_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k); diff --git a/test.c b/test.c index 5e83de5..72e7be8 100644 --- a/test.c +++ b/test.c @@ -5,15 +5,21 @@ #include "gsacak.h" #include "experiments/external/malloc_count/malloc_count.h" //memory counter +#ifndef DEBUG + #define DEBUG 0 +#endif + int main(int argc, char *argv[]){ - printf("sizeof(int_t) = %zu bytes\n", sizeof(int_t)); + #if DEBUG + printf("sizeof(int_t) = %zu bytes\n", sizeof(int_t)); + #endif + unsigned char *Text; uint_t n=0; // intput data if(argc>=2){ - //concatenate all strings s_1, s_2, .., s_d in s_1$s_2$..%s_d$# int i = 2, sum=0; for(; i<= argc; i++){ @@ -31,33 +37,18 @@ int main(int argc, char *argv[]){ printf("N = %d\n", n); } else{ - unsigned char *S[3] = {"banana", "anaba", "anan"}; - //unsigned char *S[3] = {"a", "", "c"}; - int i, sum=0; - for(i=0; i< 3; i++){ - sum += strlen((S[i]))+1; - } - n = sum+1; - Text = malloc(n*sizeof(unsigned char)); - sum=0; - for(i=0; i< 3; i++){ - sscanf(S[i], "%s", &Text[sum]); - sum += strlen((S[i]))+1; - Text[sum-1]=1;//separator - } - Text[n-1]=0; - printf("N = %d\n", n); + fprintf(stderr, "Please, insert at least one string.\n"); + exit(-1); } int i, j; - printf("Text = "); + printf("T^{cat} = "); for(i=0;i