-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseed.ts
54 lines (45 loc) · 1.27 KB
/
seed.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import fs from "fs"
import csv from 'csv-parser';
import { Index } from "@upstash/vector"
interface Row{
text:string
}
const index = new Index({
url: "https://perfect-kitten-43261-eu1-vector.upstash.io",
token: "ABgFMHBlcmZlY3Qta2l0dGVuLTQzMjYxLWV1MWFkbWluTWpWbE5EUXlNell0TlRRMk55MDBZbVF6TFRsbU1ETXRNbUk0TURnM1l6SXdaV1F4",
})
//reading from the dataset
async function parseCSV(filepath:string):Promise<Row[]> {
return new Promise((resolve,reject)=>{
const rows:Row[] = []
// csv to js array
fs.createReadStream(filepath)
.pipe(csv({separator:","}))
.on("data",(row)=>{
rows.push(row)
})
.on("error",(err)=>{
reject(err);
})
.on("end",()=>{
resolve(rows);
})
})
}
//putting into database
const batch = 30
const seed = async () =>{
const data = await parseCSV("training_data.csv");
// console.log(data);
for (let i = 0; i < data.length; i++) {
const chunk = data.slice(i,i+batch);
const formatted = chunk.map((row,batchIndex)=>({
data:row.text,
id:i+batchIndex,
metadata:{text:row.text},
}))
// console.log("upsert",formatted);
await index.upsert(formatted);
}
}
seed()