Skip to content

Commit 2b338e0

Browse files
author
Ashkan Vedadi Gargary
committed
modify scripts
1 parent 2c8e1b0 commit 2b338e0

18 files changed

+88
-201
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# Ignore all .exe files
22
*.exe
3-
*.out
3+
*.out
4+
*.json
5+
*.jsonl

paper_reproduced/related_works/cuDF/bestbuy.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,11 @@ def measure_parsing_time(json_file_path, description, query_version):
156156
# json_file_path_twitter = '/rhome/aveda002/bigdata/Test-Files/twitter_small_records_remove.json'
157157
# json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
158158
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
159-
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
160-
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
161-
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
162-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
163-
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
159+
# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
160+
# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
161+
# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
162+
# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
163+
# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
164164
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
165165

166166
# Measure and print the parsing times

paper_reproduced/related_works/cuDF/google.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,12 @@ def measure_parsing_time(json_file_path, description, query_version):
159159
# json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
160160
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
161161

162-
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
163-
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
164-
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
165-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
162+
# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
163+
# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
164+
# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
165+
# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
166166
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
167-
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
167+
# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
168168

169169

170170
# Measure and print the parsing times

paper_reproduced/related_works/cuDF/nspl.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,11 @@ def measure_parsing_time(json_file_path, description, query_version):
158158
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
159159

160160
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
161-
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
162-
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
163-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
164-
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
165-
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
161+
# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
162+
# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
163+
# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
164+
# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
165+
# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
166166

167167
# Measure and print the parsing times
168168
measure_parsing_time(json_file_path_wiki, "nspl", 0)

paper_reproduced/related_works/cuDF/twitter.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,24 +144,20 @@ def measure_parsing_time(json_file_path, description, query_version):
144144
print(f"Time taken to execute query: {query_time} ns\n")
145145

146146

147-
148-
149-
150-
151-
152147
# Paths to JSON files
153148
# json_file_path_nspl = '/rhome/aveda002/bigdata/Test-Files/wiki_small_records_remove.json'
154149
# json_file_path_wiki = '/rhome/aveda002/bigdata/Test-Files/wiki_small_records_remove.json'
155150
# json_file_path_walmart = '/rhome/aveda002/bigdata/Test-Files/walmart_small_records_remove.json'
156151
# json_file_path_twitter = '/rhome/aveda002/bigdata/Test-Files/twitter_small_records_remove.json'
157152
# json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
158153
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
159-
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
160-
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
161-
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
162-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
163-
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
164-
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
154+
# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
155+
# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
156+
# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
157+
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
158+
# json_file_path_twitter = '../../../dataset/twitter_sample_small_records.json' # /rhome/aveda002/bigdata/cuJSON/dataset/twitter_sample_large_record.json
159+
# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
160+
# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
165161

166162

167163

paper_reproduced/related_works/cuDF/walmart.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,12 @@ def measure_parsing_time(json_file_path, description, query_version):
158158
# json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
159159
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
160160

161-
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
162-
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
161+
# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
162+
# json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
163163
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
164-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
165-
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
166-
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
164+
# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
165+
# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
166+
# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
167167

168168

169169
# Measure and print the parsing times

paper_reproduced/related_works/cuDF/wiki.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,12 @@ def measure_parsing_time(json_file_path, description, query_version):
155155
# json_file_path_google = '/rhome/aveda002/bigdata/Test-Files/google_map_small_records_remove.json'
156156
# json_file_path_bestbuy = '/rhome/aveda002/bigdata/Test-Files/bestbuy_small_records_remove.json'
157157

158-
json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
158+
# json_file_path_nspl = '../../../dataset/nspl_small_records_remove.json'
159159
json_file_path_wiki = '../../../dataset/wiki_small_records_remove.json'
160-
json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
161-
json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
162-
json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
163-
json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
160+
# json_file_path_walmart = '../../../dataset/walmart_small_records_remove.json'
161+
# json_file_path_twitter = '../../../dataset/twitter_small_records_remove.json'
162+
# json_file_path_google = '../../../dataset/google_map_small_records_remove.json'
163+
# json_file_path_bestbuy = '../../../dataset/bestbuy_small_records_remove.json'
164164

165165

166166
# Measure and print the parsing times

paper_reproduced/scripts/README.md

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,26 @@
33

44
All runs were executed 10 times to calculate the average results for each method.
55

6-
## 📊 Fig 9 and Fig 10 (cuJSON vs CPU Methods)
6+
## 📊 Fig 9 and Fig 10 (cujson vs CPU Methods)
77

88
To run each method separately and get the results for each dataset (executed 10 times), use the following commands:
99

10-
- **cuJSON**
10+
- **cujson**
1111
```bash
1212
./run_cujson_fig9.sh
1313
```
1414

15-
- **simdJSON**
15+
- **simdjson**
1616
```bash
1717
./run_simdjson_fig9.sh
1818
```
1919

20-
- **RapidJSON**
20+
- **rapidjson**
2121
```bash
2222
./run_rapidjson_fig9.sh
2323
```
2424

25-
- **Pison**
25+
- **pison**
2626
```bash
2727
./run_pison_fig9.sh
2828
```
@@ -43,18 +43,18 @@ run_all_fig9.sh
4343

4444
---
4545

46-
## 📈 Fig 11 (cuJSON vs GPU Methods)
46+
## 📈 Fig 11 (cujson vs GPU Methods)
4747

4848
To run each method separately for Fig 11:
4949

50-
- **GPJSON**
50+
- **gpjson**
5151
```bash
5252
./run_gpjson_fig11.sh
5353
```
5454

55-
- **cuDF**
55+
- **cudf**
5656
```bash
57-
./run_cuDF_fig11.sh
57+
./run_cudf_fig11.sh
5858
```
5959

6060
If you'd like to run both methods together, use the following:
@@ -76,17 +76,17 @@ run_all_fig11.sh
7676

7777
To report peak GPU memory usage for each method:
7878

79-
- **cuJSON**
79+
- **cujson**
8080
```bash
8181
./run_cujson_fig12.sh
8282
```
8383

84-
- **GPJSON**
84+
- **gpjson**
8585
```bash
8686
./run_gpjson_fig12.sh
8787
```
8888

89-
- **cuDF**
89+
- **cudf**
9090
```bash
9191
./run_cudf_fig12.sh
9292
```
@@ -108,9 +108,9 @@ To run all methods together:
108108

109109
---
110110

111-
## ⏱️ Fig 13 and Table 8 (cuJSON only)
111+
## ⏱️ Fig 13 and Table 8 (cujson only)
112112

113-
For time-breakdown reporting with **cuJSON**, execute the following:
113+
For time-breakdown reporting with **cujson**, execute the following:
114114

115115
```bash
116116
./run_cujson_fig13.sh
@@ -129,32 +129,32 @@ For time-breakdown reporting with **cuJSON**, execute the following:
129129

130130
To run each method separately and report memory usage:
131131

132-
- **cuJSON**
132+
- **cujson**
133133
```bash
134134
./run_cujson_fig14.sh
135135
```
136136

137-
- **simdJSON**
137+
- **simdjson**
138138
```bash
139139
./run_simdjson_fig14.sh
140140
```
141141

142-
- **RapidJSON**
142+
- **rapidjson**
143143
```bash
144144
./run_rapidjson_fig14.sh
145145
```
146146

147-
- **Pison**
147+
- **pison**
148148
```bash
149149
./run_pison_fig14.sh
150150
```
151151

152-
- **cuDF/MetaJSON**
152+
- **cudf/MetaJSON**
153153
```bash
154154
./run_cudf_fig14.sh
155155
```
156156

157-
> **Note**: To compute results for **GPJSON**, the library requires modifications. This library lead to no output and it required to modify the source code after installation. However, Theoritcally, the best possible results of `GPJSON` will be as same as `pison`, but by modification we realize it is even worst than `pison`.
157+
> **Note**: To compute results for **gpjson**, the library requires modifications. This library lead to no output and it required to modify the source code after installation. However, Theoritcally, the best possible results of `gpjson` will be as same as `pison`, but by modification we realize it is even worst than `pison`.
158158
159159
To run all methods together:
160160

@@ -179,7 +179,7 @@ To run all methods together:
179179

180180
To report the average time for running the queries, execute the scripts for each method:
181181

182-
- **cuJSON**
182+
- **cujson**
183183
```bash
184184
./run_cujson_fig15.sh
185185
```
@@ -189,12 +189,12 @@ To report the average time for running the queries, execute the scripts for each
189189
./run_simdjson_fig15.sh
190190
```
191191

192-
- **Pison**
192+
- **pison**
193193
```bash
194194
./run_pison_fig15.sh
195195
```
196196

197-
- **RapidJSON**
197+
- **rapidjson**
198198
```bash
199199
./run_rapidjson_fig15.sh
200200
```
@@ -219,14 +219,14 @@ To run all methods together:
219219

220220
### Middle and Right: Modifying JSON Files
221221

222-
For these sections, you will need to modify the JSON file to contain only one record to compute the query time. Library modifications are required for **GPJSON** and **cuDF**, so ensure you install the libraries and use them accordingly.
222+
For these sections, you will need to modify the JSON file to contain only one record to compute the query time. Library modifications are required for **gpjson** and **cudf**, so ensure you install the libraries and use them accordingly.
223223

224224

225225
---
226226

227-
## 🧑‍💻 Fig 16: cuJSON Scalability
227+
## 🧑‍💻 Fig 16: cujson Scalability
228228

229-
To run **cuJSON** scalability tests:
229+
To run **cujson** scalability tests:
230230

231231
1. First, download the scalability data.
232232
2. Then, execute the following:
@@ -244,7 +244,7 @@ To run **cuJSON** scalability tests:
244244
## General Notes:
245245

246246
- All scripts assume you have the necessary dependencies installed.
247-
- For `GPJSON` after you install the required library files, you have to add your keys in the scripts: `scripts/run_gpjson_fig11.sh` and `scripts/run_gpjson_fig12.sh`.
247+
- For `gpjson` after you install the required library files, you have to add your keys in the scripts: `scripts/run_gpjson_fig11.sh` and `scripts/run_gpjson_fig12.sh`.
248248
- The `scripts/results` folder will contain all output files, categorized by method and csvs.
249249
- For specific modifications or troubleshooting, refer to the individual script files, or readme of each relarted works for more details.
250250
- Make sure to download all the datasets can be downloaded from https://drive.google.com/drive/folders/1PkDEy0zWOkVREfL7VuINI-m9wJe45P2Q?usp=sharing and placed into the `dataset` folder. `scabality` folder must place exactly like what it is in the `dataset` folder for proper experiment.

paper_reproduced/scripts/figure_generator.sh

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,8 @@
1-
#!/bin/bash -l
2-
3-
#SBATCH --nodes=1
4-
#SBATCH --ntasks=1
5-
#SBATCH --cpus-per-task=4
6-
#SBATCH --output="result-figure-generation.log"
7-
#SBATCH --mem=8G
8-
#SBATCH -p short_gpu
9-
#SBATCH --gres=gpu:ada6000:1
10-
#SBATCH --time=01:00:00
11-
12-
# -------------------------------
13-
# Step 1: Load necessary modules
14-
# -------------------------------
15-
module load slurm
16-
module load cuda/11.8
1+
#!/bin/bash
2+
set -e
173

184
# -------------------------------
19-
# Step 3: Run the external Python script to generate the figure
5+
# Run the external Python script to generate the figure
206
# -------------------------------
217
python plot_fig9.py
228
python plot_fig11.py

0 commit comments

Comments
 (0)