AutomataLab
diff --git a/‎.gitignore
Lines changed: 3 additions & 1 deletion b/‎.gitignore
Lines changed: 3 additions & 1 deletion
diff --git a/‎paper_reproduced/related_works/cuDF/bestbuy.py
Lines changed: 5 additions & 5 deletions b/‎paper_reproduced/related_works/cuDF/bestbuy.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎paper_reproduced/related_works/cuDF/google.py
Lines changed: 5 additions & 5 deletions b/‎paper_reproduced/related_works/cuDF/google.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎paper_reproduced/related_works/cuDF/nspl.py
Lines changed: 5 additions & 5 deletions b/‎paper_reproduced/related_works/cuDF/nspl.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎paper_reproduced/related_works/cuDF/twitter.py
Lines changed: 7 additions & 11 deletions b/‎paper_reproduced/related_works/cuDF/twitter.py
Lines changed: 7 additions & 11 deletions
diff --git a/‎paper_reproduced/related_works/cuDF/walmart.py
Lines changed: 5 additions & 5 deletions b/‎paper_reproduced/related_works/cuDF/walmart.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎paper_reproduced/related_works/cuDF/wiki.py
Lines changed: 5 additions & 5 deletions b/‎paper_reproduced/related_works/cuDF/wiki.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎paper_reproduced/scripts/README.md
Lines changed: 27 additions & 27 deletions b/‎paper_reproduced/scripts/README.md
Lines changed: 27 additions & 27 deletions
diff --git a/‎paper_reproduced/scripts/figure_generator.sh
Lines changed: 3 additions & 17 deletions b/‎paper_reproduced/scripts/figure_generator.sh
Lines changed: 3 additions & 17 deletions
@@ -1,3 +1,5 @@
 # Ignore all .exe files
 *.exe
-*.out
+*.out
+*.json
+*.jsonl
@@ -156,11 +156,11 @@ def measure_parsing_time(json_file_path, description, query_version):
 # json_file_path_twitter = '/rhome/aveda002/bigdata/Test-Files/twitter_small_records_remove.json'
 # json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
-json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
-json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
-json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
-json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
+# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
+# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
+# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
+# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
+# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
 json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 # Measure and print the parsing times
 
@@ -159,12 +159,12 @@ def measure_parsing_time(json_file_path, description, query_version):
 # json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
 
-json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
-json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
-json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
+# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
+# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
+# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
+# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
 json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
-json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
+# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 
 # Measure and print the parsing times
 
@@ -158,11 +158,11 @@ def measure_parsing_time(json_file_path, description, query_version):
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
 
 json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
-json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
-json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
-json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
-json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
+# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
+# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
+# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
+# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
+# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 # Measure and print the parsing times
 measure_parsing_time(json_file_path_wiki, "nspl", 0)
 
@@ -144,24 +144,20 @@ def measure_parsing_time(json_file_path, description, query_version):
     print(f"Time taken to execute query: {query_time} ns\n")
 
 
-
-    
-
-    
-
 # Paths to JSON files
 # json_file_path_nspl = '/rhome/aveda002/bigdata/Test-Files/wiki_small_records_remove.json'
 # json_file_path_wiki = '/rhome/aveda002/bigdata/Test-Files/wiki_small_records_remove.json'
 # json_file_path_walmart = '/rhome/aveda002/bigdata/Test-Files/walmart_small_records_remove.json'
 # json_file_path_twitter = '/rhome/aveda002/bigdata/Test-Files/twitter_small_records_remove.json'
 # json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
-json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
-json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
-json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
-json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
-json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
+# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
+# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
+# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
+json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json' 
+# json_file_path_twitter = '../../../dataset/twitter_sample_small_records.json' # /rhome/aveda002/bigdata/cuJSON/dataset/twitter_sample_large_record.json
+# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
+# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 
 
 
@@ -158,12 +158,12 @@ def measure_parsing_time(json_file_path, description, query_version):
 # json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
 
-json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
-json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
+# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
+# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
 json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
-json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
-json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
+# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
+# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
+# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 
 # Measure and print the parsing times
 
@@ -155,12 +155,12 @@ def measure_parsing_time(json_file_path, description, query_version):
 # json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
 # json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
 
-json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
+# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
 json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
-json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
-json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
-json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
-json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
+# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
+# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
+# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
+# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
 
 
 # Measure and print the parsing times
 
@@ -3,26 +3,26 @@
 
 All runs were executed 10 times to calculate the average results for each method.
 
-## 📊 Fig 9 and Fig 10 (cuJSON vs CPU Methods)
+## 📊 Fig 9 and Fig 10 (cujson vs CPU Methods)
 
 To run each method separately and get the results for each dataset (executed 10 times), use the following commands:
 
-- **cuJSON**  
+- **cujson**  
   ```bash
   ./run_cujson_fig9.sh
   ```
 
-- **simdJSON**  
+- **simdjson**  
   ```bash
   ./run_simdjson_fig9.sh
   ```
 
-- **RapidJSON**  
+- **rapidjson**  
   ```bash
   ./run_rapidjson_fig9.sh
   ```
 
-- **Pison**  
+- **pison**  
   ```bash
   ./run_pison_fig9.sh
   ```
@@ -43,18 +43,18 @@ run_all_fig9.sh
 
 ---
 
-## 📈 Fig 11 (cuJSON vs GPU Methods)
+## 📈 Fig 11 (cujson vs GPU Methods)
 
 To run each method separately for Fig 11:
 
-- **GPJSON**  
+- **gpjson**  
   ```bash
   ./run_gpjson_fig11.sh
   ```
 
-- **cuDF**  
+- **cudf**  
   ```bash
-  ./run_cuDF_fig11.sh
+  ./run_cudf_fig11.sh
   ```
 
 If you'd like to run both methods together, use the following:
@@ -76,17 +76,17 @@ run_all_fig11.sh
 
 To report peak GPU memory usage for each method:
 
-- **cuJSON**  
+- **cujson**  
   ```bash
   ./run_cujson_fig12.sh
   ```
 
-- **GPJSON**  
+- **gpjson**  
   ```bash
   ./run_gpjson_fig12.sh
   ```
 
-- **cuDF**  
+- **cudf**  
   ```bash
   ./run_cudf_fig12.sh
   ```
@@ -108,9 +108,9 @@ To run all methods together:
 
 ---
 
-## ⏱️ Fig 13 and Table 8 (cuJSON only)
+## ⏱️ Fig 13 and Table 8 (cujson only)
 
-For time-breakdown reporting with **cuJSON**, execute the following:
+For time-breakdown reporting with **cujson**, execute the following:
 
 ```bash
 ./run_cujson_fig13.sh
@@ -129,32 +129,32 @@ For time-breakdown reporting with **cuJSON**, execute the following:
 
 To run each method separately and report memory usage:
 
-- **cuJSON**  
+- **cujson**  
   ```bash
   ./run_cujson_fig14.sh
   ```
 
-- **simdJSON**  
+- **simdjson**  
   ```bash
   ./run_simdjson_fig14.sh
   ```
 
-- **RapidJSON**  
+- **rapidjson**  
   ```bash
   ./run_rapidjson_fig14.sh
   ```
 
-- **Pison**  
+- **pison**  
   ```bash
   ./run_pison_fig14.sh
   ```
 
-- **cuDF/MetaJSON**  
+- **cudf/MetaJSON**  
   ```bash
   ./run_cudf_fig14.sh
   ```
 
-> **Note**: To compute results for **GPJSON**, the library requires modifications. This library lead to no output and it required to modify the source code after installation. However, Theoritcally, the best possible results of `GPJSON` will be as same as `pison`, but by modification we realize it is even worst than `pison`.
+> **Note**: To compute results for **gpjson**, the library requires modifications. This library lead to no output and it required to modify the source code after installation. However, Theoritcally, the best possible results of `gpjson` will be as same as `pison`, but by modification we realize it is even worst than `pison`.
 
 To run all methods together:
 
@@ -179,7 +179,7 @@ To run all methods together:
 
 To report the average time for running the queries, execute the scripts for each method:
 
-- **cuJSON**  
+- **cujson**  
   ```bash
   ./run_cujson_fig15.sh
   ```
@@ -189,12 +189,12 @@ To report the average time for running the queries, execute the scripts for each
   ./run_simdjson_fig15.sh
   ```
 
-- **Pison**  
+- **pison**  
   ```bash
   ./run_pison_fig15.sh
   ```
 
-- **RapidJSON**  
+- **rapidjson**  
   ```bash
   ./run_rapidjson_fig15.sh
   ```
@@ -219,14 +219,14 @@ To run all methods together:
 
 ### Middle and Right: Modifying JSON Files
 
-For these sections, you will need to modify the JSON file to contain only one record to compute the query time. Library modifications are required for **GPJSON** and **cuDF**, so ensure you install the libraries and use them accordingly.
+For these sections, you will need to modify the JSON file to contain only one record to compute the query time. Library modifications are required for **gpjson** and **cudf**, so ensure you install the libraries and use them accordingly.
 
 
 ---
 
-## 🧑‍💻 Fig 16: cuJSON Scalability
+## 🧑‍💻 Fig 16: cujson Scalability
 
-To run **cuJSON** scalability tests:
+To run **cujson** scalability tests:
 
 1. First, download the scalability data.
 2. Then, execute the following:
@@ -244,7 +244,7 @@ To run **cuJSON** scalability tests:
 ## General Notes:
 
 - All scripts assume you have the necessary dependencies installed.
-- For `GPJSON` after you install the required library files, you have to add your keys in the scripts: `scripts/run_gpjson_fig11.sh` and `scripts/run_gpjson_fig12.sh`.
+- For `gpjson` after you install the required library files, you have to add your keys in the scripts: `scripts/run_gpjson_fig11.sh` and `scripts/run_gpjson_fig12.sh`.
 - The `scripts/results` folder will contain all output files, categorized by method and csvs.
 - For specific modifications or troubleshooting, refer to the individual script files, or readme of each relarted works for more details.
 - Make sure to download all the datasets can be downloaded from https://drive.google.com/drive/folders/1PkDEy0zWOkVREfL7VuINI-m9wJe45P2Q?usp=sharing and placed into the `dataset` folder. `scabality` folder must place exactly like what it is in the `dataset` folder for proper experiment.
 
@@ -1,22 +1,8 @@
-#!/bin/bash -l
-
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=4
-#SBATCH --output="result-figure-generation.log"
-#SBATCH --mem=8G
-#SBATCH -p short_gpu
-#SBATCH --gres=gpu:ada6000:1
-#SBATCH --time=01:00:00
-
-# -------------------------------
-# Step 1: Load necessary modules
-# -------------------------------
-module load slurm
-module load cuda/11.8
+#!/bin/bash
+set -e
 
 # -------------------------------
-# Step 3: Run the external Python script to generate the figure
+# Run the external Python script to generate the figure
 # -------------------------------
 python plot_fig9.py
 python plot_fig11.py
-Original file line number
+Diff line change
@@ @@ -1,3 +1,5 @@ @@
 # Ignore all .exe files
 *.exe
 -*.out
 +*.out
 +*.json
 +*.jsonl