Skip to content

Commit

Permalink
added feature to join same-tag siblings
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Franzl committed Nov 21, 2017
1 parent 229b698 commit b2b50e0
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Each node is processed in the following sequence:
| [opts.allow_attributes_by_tag] | [<code>TagAttributeNameSpec</code>](#TagAttributeNameSpec) | <code>{}</code> | Matching attribute names of a matching node are kept. Other attributes are removed. |
| [opts.allow_classes_by_tag] | [<code>TagClassNameSpec</code>](#TagClassNameSpec) | <code>{}</code> | Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed. |
| [opts.remove_empty] | <code>boolean</code> | <code>false</code> | Remove nodes which are completely empty or contain only white space. |
| [opts.join_siblings] | [<code>Array.&lt;Tagname&gt;</code>](#Tagname) | <code>[]</code> | Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes. |

<a name="DomDocument"></a>

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "sanitize-dom",
"version": "1.0.0",
"version": "1.0.1",
"description": "",
"main": "src/index.js",
"directories": {
Expand Down
51 changes: 51 additions & 0 deletions src/sanitize-dom.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
* @param {TagAttributeNameSpec} [opts.allow_attributes_by_tag={}] - Matching attribute names of a matching node are kept. Other attributes are removed.
* @param {TagClassNameSpec} [opts.allow_classes_by_tag={}] - Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed.
* @param {boolean} [opts.remove_empty=false] Remove nodes which are completely empty or contain only white space.
* @param {Tagname[]} [opts.join_siblings=[]] Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes.
*
*/
function sanitizeDom(
Expand Down Expand Up @@ -190,6 +191,10 @@ function sanitizeDom(
if (!opts.allow_attributes_by_tag) opts.allow_attributes_by_tag = {};
if (!opts.allow_classes_by_tag) opts.allow_classes_by_tag = {};

if (!opts.join_siblings) opts.join_siblings = [];





var parents = [];
Expand Down Expand Up @@ -366,6 +371,48 @@ function sanitizeDom(
nd.remove();
}

function joinSiblings(parent, tags) {
let children = childrenOf(parent);

for (let i = 0; i < children.length; i++) {
let nd = children[i];
let nd1 = children[i+1];
let nd2 = children[i+2];

if (
nd1 &&
nd.nodeName == nd1.nodeName &&
tags.includes(nd.nodeName) &&
tags.includes(nd1.nodeName)
) {
for (let c of childrenOf(nd1)) {
nd.appendChild(c);
}
nd1.remove();
joinSiblings(parent, tags); // restart from beginning until nothing joinable
return;
}


if (
nd1 &&
nd2 &&
nd.nodeName == nd2.nodeName &&
nd1.nodeType == 3 &&
nd1.textContent.match(/^\s+$/) &&
tags.includes(nd2.nodeName)
) {
nd.appendChild(nd1);
for (let c of childrenOf(nd2)) {
nd.appendChild(c);
}
nd2.remove();
joinSiblings(parent, tags); // restart from beginning until nothing joinable
return;
}
}
}

function sanitizeNode(nd) {
if (nd.sanitize_skip) {
delete nd.sanitize_skip;
Expand Down Expand Up @@ -449,6 +496,10 @@ function sanitizeDom(
nd.remove();
}
}

if (opts.join_siblings.length > 0) {
joinSiblings(parent, opts.join_siblings);
}
}
}

Expand Down
40 changes: 40 additions & 0 deletions tests/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,46 @@ describe('initialization', function() {

});

describe('join_siblings', function() {

it('should join same-tag siblings of specified tags', function() {
assert.equal(
sanitizeHtml('<b>abc</b> <b>def</b> <i>jkl</i>', {
join_siblings: ['B', 'I'],
allow_tags_direct: {
'.*': '.*',
}
}),
'<b>abc def</b> <i>jkl</i>'
);
});

it('should join same-tag siblings of specified tags and leave children intact', function() {
assert.equal(
sanitizeHtml('<b>abc</b> <b>def <i>ghi</i></b><b>jkl</b>', {
join_siblings: ['B', 'I'],
allow_tags_direct: {
'.*': '.*',
}
}),
'<b>abc def <i>ghi</i>jkl</b>'
);
});


it('should not join same-tag siblings when separated by non-whitespace text', function() {
assert.equal(
sanitizeHtml('<b>abc</b> x <b>def</b> <b>ghi <i>jkl</i></b><b>mno</b>', {
join_siblings: ['B', 'I'],
allow_tags_direct: {
'.*': '.*',
}
}),
'<b>abc</b> x <b>def ghi <i>jkl</i>mno</b>'
);
});
});

describe('allow_tags', function() {

it('should flatten all markup by default', function() {
Expand Down

0 comments on commit b2b50e0

Please sign in to comment.