Skip to content

Commit

Permalink
Update README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
felipelouza committed Aug 17, 2018
1 parent d86fd08 commit 0c2c8f5
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 41 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# gsa-is

gSAIS and gSACA-K are suffix array construction algorithms for string collections.
gSAIS and gSACA-K are suffix array (SA) construction algorithms for string collections.

## Introduction

gSAIS and gSACA-K [1, 2] extend the linear-time suffix sorting algorithms SAIS
[3] and SACA-K [4] to compute the suffix array for a string collection,
[3] and SACA-K [4] to compute SA for string collections,
maintaining their theoretical bounds and improving their practical performance.

Moreover, gSAIS and gSACA-K can also compute the LCP-array (LCP) and the
Expand All @@ -28,7 +28,7 @@ An ANSI C Compiler (e.g. GNU GCC)
/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA suffix array
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n string length
Expand All @@ -39,8 +39,8 @@ int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n);

/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, with s[n-1]=0
* @param SA suffix array
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n string length
Expand Down Expand Up @@ -151,4 +151,4 @@ Please, if you use this tool in an academic setting cite the following paper:

## Thanks

Thanks to [Giovanni Manzini](https://github.com/giovmanz) and [Giovanna Rosone](https://github.com/giovannarosone) by helpful suggestions and debugging.
Thanks to [Giovanni Manzini](https://github.com/giovmanz), [Giovanna Rosone](https://github.com/giovannarosone) and René de R. Sacramento by helpful suggestions and debugging.
16 changes: 12 additions & 4 deletions gsacak.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// vim: noai:ts=2:sw=2

/*
* Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles
* contact: [email protected]
Expand Down Expand Up @@ -92,19 +94,25 @@ int sacak_int(int_text *s, uint_t *SA, uint_t n, uint_t k);
/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA suffix array
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n string length
* @param n String length
*
* @return depth of the recursive calls.
*/
int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n);

/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, with s[n-1]=0
* @param K alphabet size
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n String length
* @param K Alphabet size
*
* @return depth of the recursive calls.
*/
int gsacak_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k);

Expand Down
29 changes: 19 additions & 10 deletions gsais.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// vim: noai:ts=2:sw=2

/*
* Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles
* contact: [email protected]
Expand Down Expand Up @@ -75,9 +77,9 @@ typedef uint_t int_da;
/** @brief computes the suffix array of string s[0..n-1]
*
* @param s input string with s[n-1]=0
* @param SA suffix array
* @param SA suffix array
* @param n string length
* @return -1 if an error occured, otherwise the depth of the recursive calls.
* @return -1 if an error occured, otherwise the depth of the recursive calls.
*/
int sais(unsigned char *s, uint_t *SA, uint_t n);

Expand All @@ -91,20 +93,27 @@ int sais_int(int_text *s, uint_t *SA, uint_t n, uint_t k);

/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA suffix array
* @param LCP LCP array
* @param DA Document array
* @param n string length
*
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n String length
* @param K alphabet size
*
* @return depth of the recursive calls.
*/
int gsais(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n);

/** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1]
*
* @param s input concatenated string, with s[n-1]=0
* @param K alphabet size
* @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0
* @param SA Suffix array
* @param LCP LCP array
* @param DA Document array
* @param n String length
* @param K Alphabet size
*
* @return depth of the recursive calls.
*/
int gsais_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k);

Expand Down
32 changes: 11 additions & 21 deletions test.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,21 @@
#include "gsacak.h"
#include "experiments/external/malloc_count/malloc_count.h" //memory counter

#ifndef DEBUG
#define DEBUG 0
#endif

int main(int argc, char *argv[]){

printf("sizeof(int_t) = %zu bytes\n", sizeof(int_t));
#if DEBUG
printf("sizeof(int_t) = %zu bytes\n", sizeof(int_t));
#endif

unsigned char *Text;
uint_t n=0;

// intput data
if(argc>=2){

//concatenate all strings s_1, s_2, .., s_d in s_1$s_2$..%s_d$#
int i = 2, sum=0;
for(; i<= argc; i++){
Expand All @@ -31,33 +37,18 @@ int main(int argc, char *argv[]){
printf("N = %d\n", n);
}
else{
unsigned char *S[3] = {"banana", "anaba", "anan"};
//unsigned char *S[3] = {"a", "", "c"};
int i, sum=0;
for(i=0; i< 3; i++){
sum += strlen((S[i]))+1;
}
n = sum+1;
Text = malloc(n*sizeof(unsigned char));
sum=0;
for(i=0; i< 3; i++){
sscanf(S[i], "%s", &Text[sum]);
sum += strlen((S[i]))+1;
Text[sum-1]=1;//separator
}
Text[n-1]=0;
printf("N = %d\n", n);
fprintf(stderr, "Please, insert at least one string.\n");
exit(-1);
}

int i, j;
printf("Text = ");
printf("T^{cat} = ");
for(i=0;i<n-1;i++){
if(Text[i]==1) printf("$");
else printf("%c", Text[i]);
}
printf("#\n");

/**/
// allocate
uint_t *SA = (uint_t *)malloc(n * sizeof(uint_t));
int_t *LCP = (int_t *)malloc(n * sizeof(int_t));
Expand All @@ -79,7 +70,6 @@ int main(int argc, char *argv[]){
printf("#\n");
}


// deallocate
free(SA);
free(DA);
Expand Down

0 comments on commit 0c2c8f5

Please sign in to comment.