Skip to content

Commit 24fecff

Browse files
committed
added mit license and documentation in readme
1 parent dcd0594 commit 24fecff

File tree

5 files changed

+244
-61
lines changed

5 files changed

+244
-61
lines changed

.eslintrc.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"rules": {
1515
"prettier/prettier": ["error"],
1616
"no-useless-constructor": "off",
17+
"no-redeclare": "off",
1718
"@typescript-eslint/no-useless-constructor": ["error"],
1819
"no-use-before-define": "off",
1920
"@typescript-eslint/no-use-before-define": [

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 - present, Roshan Acharya
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# jsonFromTable
2+
3+
Converts html tables to JSON and JS objects
4+
5+
## Installation
6+
7+
Install via npm
8+
9+
```sh
10+
npm install jsonfromtable
11+
```
12+
13+
## Usage
14+
15+
```js
16+
const { jsonFromtable } = require('jsonfromtable')
17+
18+
const obj = jsonFromTable({
19+
html: `<table>...</table>`,
20+
})
21+
22+
const json = jsonFromTable({
23+
html: `<table>...</table>`,
24+
format: 'json',
25+
})
26+
27+
const arr = jsonFromTable({
28+
html: `<table>...</table>`,
29+
format: 'array',
30+
})
31+
32+
const [headers, body] = jsonFromTable({
33+
html: `<table>...</table>`,
34+
format: 'raw',
35+
})
36+
```
37+
38+
`jsonFromTable` function accepts only one argument `options`;
39+
40+
```ts
41+
interface Options {
42+
url?: string // utl to page which contains table
43+
html?: string // html which contains table
44+
selector?: string // table selector
45+
hSelector?: string // head selector
46+
bSelector?: [string, string] // body selector [row, td]
47+
format?: 'json' | 'array' | 'raw' | 'object' // output format
48+
}
49+
```
50+
51+
## Options
52+
53+
### url
54+
55+
If you want the output from a url then you need to pass `url` option. The url should be of a webpage which has a table. If url parameter is passed then the function will return a promise.
56+
57+
```js
58+
;(async () => {
59+
const obj = await jsonFromTable({ url: 'https://example.com' })
60+
61+
console.log(obj)
62+
})()
63+
```
64+
65+
### html
66+
67+
If you want the output from a html then you need to pass `html` option. The html should contain `table` tag.
68+
69+
```js
70+
const obj = jsonFromTable({
71+
html: `<table>...</table>`,
72+
})
73+
74+
console.log(obj)
75+
```
76+
77+
### format
78+
79+
If you want the json or array or raw output then you can pass `format` option. Default value is `object`.
80+
81+
```js
82+
const json = jsonFromTable({
83+
html: `<table>...</table>`,
84+
format: 'json',
85+
})
86+
87+
jsonFromTable({
88+
url: `https://example.com`,
89+
format: 'array',
90+
}).then((arr) => console.log(arr))
91+
92+
const [headers, body] = jsonFromTable({
93+
html: `<table>...</table>`,
94+
format: 'raw',
95+
})
96+
```
97+
98+
### selector
99+
100+
If the page has more than one table, then you can pass css selector of the table as `selector`.
101+
102+
```js
103+
const html = `
104+
<html>
105+
<table>...</table>
106+
<table class="table">...</table>
107+
</html>
108+
`
109+
110+
const obj = jsonFromTable({
111+
html: html,
112+
selector: '.table',
113+
})
114+
115+
console.log(obj)
116+
```
117+
118+
### hSelector
119+
120+
By default `tr:first-child th` is used to get the headings from table. Sometimes that selecter may not give you the best result. In such case you can provide css selector which will select all headings.
121+
122+
```js
123+
const obj = jsonFromTable({
124+
html: `<table>...</table>`,
125+
hSelector: `thead tr:first-child th`,
126+
})
127+
128+
console.log(obj)
129+
```
130+
131+
### bSelector
132+
133+
By default `['tr:not(:first-child)', 'td']` is used to get body from table. Sometimes that selecter may not give you the best result. In such case you can provide css selector.
134+
135+
```js
136+
const obj = jsonFromTable({
137+
html: `<table>...</table>`,
138+
bSelector: ['tbody tr:not(:first-child)', 'td'],
139+
})
140+
141+
console.log(obj)
142+
```
143+
144+
> Note that if provided `hSelector` and `bSelector` failes to select headers/body than following selectors will be used to select and get headers and body.
145+
146+
```js
147+
const hSelectors = [
148+
'thead tr:first-child th',
149+
'tr:first-child th',
150+
'tr:first-child td',
151+
]
152+
const bSelectors = [
153+
['tbody tr', 'td'],
154+
['tr:not(:first-child)', 'td'],
155+
['tr', 'td'],
156+
]
157+
```
158+
159+
## License
160+
161+
MIT

src/index.ts

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,108 @@
1-
import fetch from "node-fetch";
2-
import cheerio, { Cheerio, CheerioAPI, Node } from "cheerio";
3-
import { Format, output, Result } from "./utils";
1+
import fetch from 'node-fetch'
2+
import cheerio, { Cheerio, CheerioAPI, Node } from 'cheerio'
3+
import { Format, output, Result } from './utils'
44

55
interface Options {
6-
url?: string;
7-
html?: string;
8-
selector?: string;
9-
hSelector?: string;
10-
bSelector?: string[];
11-
format?: Format;
6+
url?: string
7+
html?: string
8+
selector?: string
9+
hSelector?: string
10+
bSelector?: [string, string]
11+
format?: Format
1212
}
1313

1414
/**
1515
* Get JSON, Object, Array from html tables
1616
*
1717
* @param options Options
1818
*/
19-
function jsonFromTable<T extends Format = "object">(
19+
function jsonFromTable<T extends Format = 'object'>(
2020
options: { url: string; format?: T } & Options
21-
): Promise<Result<T>>;
21+
): Promise<Result<T>>
2222

23-
function jsonFromTable<T extends Format = "object">(
23+
function jsonFromTable<T extends Format = 'object'>(
2424
options: { html: string; format?: T } & Options
25-
): Result<T>;
25+
): Result<T>
2626

2727
function jsonFromTable<T extends Format>(options: Options = {}) {
2828
const {
2929
html,
3030
url,
31-
selector = "table",
32-
hSelector = "tr:first-child th",
33-
bSelector = ["tr:not(:first-child)", "td"],
34-
format = "object",
35-
} = options;
31+
selector = 'table',
32+
hSelector = 'tr:first-child th',
33+
bSelector = ['tr:not(:first-child)', 'td'],
34+
format = 'object',
35+
} = options
3636
// prettier-ignore
3737
const hSelectors = [hSelector, "thead tr:first-child th", "tr:first-child th", "tr:first-child td"];
3838
// prettier-ignore
3939
const bSelectors = [bSelector, ["tbody tr", "td"], ["tr:not(:first-child)", "td"], ["tr", "td"]];
4040

4141
if (html) {
42-
return htmlTableToJson(html);
42+
return htmlTableToJson(html)
4343
} else if (url) {
4444
return fetch(url).then(async (res) => {
45-
const html = await res.text();
46-
return htmlTableToJson(html);
47-
});
45+
const html = await res.text()
46+
return htmlTableToJson(html)
47+
})
4848
} else {
49-
throw new Error(`You need to provide at least a url or html`);
49+
throw new Error(`You need to provide at least a url or html`)
5050
}
5151

5252
function htmlTableToJson(html: string) {
53-
const $ = cheerio.load(html);
53+
const $ = cheerio.load(html)
5454

55-
const table = $(selector);
55+
const table = $(selector)
5656

5757
if (table.html() === null)
58-
throw new Error(`Couldn't find table with selector "${selector}"`);
58+
throw new Error(`Couldn't find table with selector "${selector}"`)
5959

60-
let headers = getHeaders($, table, hSelectors);
61-
let body = getBody($, table, bSelectors);
60+
const headers = getHeaders($, table, hSelectors)
61+
const body = getBody($, table, bSelectors)
6262

6363
if (headers.values.length !== body.values.length) {
6464
console.warn(
6565
`Length of body and head is not same:\nHeader: ${headers.values.length}\nBody: ${body.values.length}`
66-
);
66+
)
6767
}
6868

69-
return output(headers, body, format) as Result<T>;
69+
return output(headers, body, format) as Result<T>
7070
}
7171
}
7272

7373
function getHeaders($: CheerioAPI, table: Cheerio<Node>, selectors: string[]) {
7474
for (const selector of selectors) {
75-
const list = $(selector, table.html());
75+
const list = $(selector, table.html())
7676

7777
if (list.html() !== null) {
78-
const values = list.toArray().map((v) => $(v).text().trim());
79-
return values;
78+
const values = list.toArray().map((v) => $(v).text().trim())
79+
return values
8080
}
8181
}
8282

83-
return [];
83+
return []
8484
}
8585

8686
function getBody($: CheerioAPI, table: Cheerio<Node>, selectors: string[][]) {
8787
for (const selector of selectors) {
88-
const rows = $(selector[0], table.html()).toArray();
88+
const rows = $(selector[0], table.html()).toArray()
8989

9090
if (rows.length > 0) {
91-
let values: any[] = [];
91+
const values: string[][] = []
9292

9393
for (const row of rows) {
9494
const tds = $(selector[1], $(row).html())
9595
.toArray()
96-
.map((v) => $(v).text());
96+
.map((v) => $(v).text())
9797

98-
values.push(tds);
98+
values.push(tds)
9999
}
100100

101-
return values;
101+
return values
102102
}
103103
}
104104

105-
return [];
105+
return []
106106
}
107107

108-
export { jsonFromTable, Format, Result, Options, output };
108+
export { jsonFromTable, Format, Result, Options, output }

0 commit comments

Comments
 (0)