Skip to content

Commit

Permalink
Ensure isBlank checks for significant elements within the given node.
Browse files Browse the repository at this point in the history
DRY up the is/has functions and always use uppercase to prevent excessive toLowerCase calls
  • Loading branch information
domchristie committed May 12, 2020
1 parent 46fc45b commit ae5c87e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 16 deletions.
9 changes: 5 additions & 4 deletions src/node.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isBlock, isVoid, hasVoid } from './utilities'
import { isBlock, isVoid, hasVoid, isMeaningfulWhenBlank, hasMeaningfulWhenBlank } from './utilities'

export default function Node (node) {
node.isBlock = isBlock(node)
Expand All @@ -10,10 +10,11 @@ export default function Node (node) {

function isBlank (node) {
return (
['A', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
/^\s*$/i.test(node.textContent) &&
!isVoid(node) &&
!hasVoid(node)
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}

Expand Down
49 changes: 37 additions & 12 deletions src/utilities.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,53 @@ export function repeat (character, count) {
}

export var blockElements = [
'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
]

export function isBlock (node) {
return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
return is(node, blockElements)
}

export var voidElements = [
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
]

export function isVoid (node) {
return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
return is(node, voidElements)
}

var voidSelector = voidElements.join()
export function hasVoid (node) {
return node.querySelector && node.querySelector(voidSelector)
return has(node, voidElements)
}

var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
]

export function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}

export function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}

function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}

function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
10 changes: 10 additions & 0 deletions test/turndown-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ test('keep rules are overridden by the standard rules', function (t) {
t.equal(turndownService.turndown('<p>Hello world</p>'), 'Hello world')
})

test('keeping elements that have a blank textContent but contain significant elements', function (t) {
t.plan(1)
var turndownService = new TurndownService()
turndownService.keep('figure')
t.equal(
turndownService.turndown('<figure><iframe src="http://example.com"></iframe></figure>'),
'<figure><iframe src="http://example.com"></iframe></figure>'
)
})

test('keepReplacement can be customised', function (t) {
t.plan(1)
var turndownService = new TurndownService({
Expand Down

0 comments on commit ae5c87e

Please sign in to comment.