/** * SaxParser.js * * Copyright, Moxiecode Systems AB * Released under LGPL License. * * License: http://www.tinymce.com/license * Contributing: http://www.tinymce.com/contributing */ /** * This class parses HTML code using pure JavaScript and executes various events for each item it finds. It will * always execute the events in the right order for tag soup code like
. It will also remove elements * and attributes that doesn't fit the schema if the validate setting is enabled. * * @example * var parser = new tinymce.html.SaxParser({ * validate: true, * * comment: function(text) { * console.log('Comment:', text); * }, * * cdata: function(text) { * console.log('CDATA:', text); * }, * * text: function(text, raw) { * console.log('Text:', text, 'Raw:', raw); * }, * * start: function(name, attrs, empty) { * console.log('Start:', name, attrs, empty); * }, * * end: function(name) { * console.log('End:', name); * }, * * pi: function(name, text) { * console.log('PI:', name, text); * }, * * doctype: function(text) { * console.log('DocType:', text); * } * }, schema); * @class tinymce.html.SaxParser * @version 3.4 */ define("tinymce/html/SaxParser", [ "tinymce/html/Schema", "tinymce/html/Entities", "tinymce/util/Tools" ], function(Schema, Entities, Tools) { var each = Tools.each; /** * Constructs a new SaxParser instance. * * @constructor * @method SaxParser * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks. * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing. */ return function(settings, schema) { var self = this, noop = function() {}; settings = settings || {}; self.schema = schema = schema || new Schema(); if (settings.fix_self_closing !== false) { settings.fix_self_closing = true; } // Add handler functions from settings and setup default handlers each('comment cdata text start end pi doctype'.split(' '), function(name) { if (name) { self[name] = settings[name] || noop; } }); /** * Parses the specified HTML string and executes the callbacks for each item it finds. * * @example * new SaxParser({...}).parse('text'); * @method parse * @param {String} html Html string to sax parse. */ self.parse = function(html) { var self = this, matches, index = 0, value, endRegExp, stack = [], attrList, i, text, name; var isInternalElement, removeInternalElements, shortEndedElements, fillAttrsMap, isShortEnded; var validate, elementRule, isValidElement, attr, attribsValue, validAttributesMap, validAttributePatterns; var attributesRequired, attributesDefault, attributesForced; var anyAttributesRequired, selfClosing, tokenRegExp, attrRegExp, specialElements, attrValue, idCount = 0; var decode = Entities.decode, fixSelfClosing, filteredAttrs = Tools.makeMap('src,href'); function processEndTag(name) { var pos, i; // Find position of parent of the same type pos = stack.length; while (pos--) { if (stack[pos].name === name) { break; } } // Found parent if (pos >= 0) { // Close all the open elements for (i = stack.length - 1; i >= pos; i--) { name = stack[i]; if (name.valid) { self.end(name.name); } } // Remove the open elements from the stack stack.length = pos; } } function parseAttribute(match, name, value, val2, val3) { var attrRule, i, trimRegExp = /[\s\u0000-\u001F]+/g; name = name.toLowerCase(); value = name in fillAttrsMap ? name : decode(value || val2 || val3 || ''); // Handle boolean attribute than value attribute // Validate name and value pass through all data- attributes if (validate && !isInternalElement && name.indexOf('data-') !== 0) { attrRule = validAttributesMap[name]; // Find rule by pattern matching if (!attrRule && validAttributePatterns) { i = validAttributePatterns.length; while (i--) { attrRule = validAttributePatterns[i]; if (attrRule.pattern.test(name)) { break; } } // No rule matched if (i === -1) { attrRule = null; } } // No attribute rule found if (!attrRule) { return; } // Validate value if (attrRule.validValues && !(value in attrRule.validValues)) { return; } } if (filteredAttrs[name] && !settings.allow_script_urls) { if (/(java|vb)script:/i.test(decodeURIComponent(value.replace(trimRegExp, '')))) { return; } } // Add attribute to list and map attrList.map[name] = value; attrList.push({ name: name, value: value }); } // Precompile RegExps and map objects tokenRegExp = new RegExp('<(?:' + '(?:!--([\\w\\W]*?)-->)|' + // Comment '(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)|' + // CDATA '(?:!DOCTYPE([\\w\\W]*?)>)|' + // DOCTYPE '(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)|' + // PI '(?:\\/([^>]+)>)|' + // End element '(?:([A-Za-z0-9\\-\\:\\.]+)((?:\\s+[^"\'>]+(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>]*))*|\\/|\\s+)>)' + // Start element ')', 'g'); attrRegExp = /([\w:\-]+)(?:\s*=\s*(?:(?:\"((?:[^\"])*)\")|(?:\'((?:[^\'])*)\')|([^>\s]+)))?/g; // Setup lookup tables for empty elements and boolean attributes shortEndedElements = schema.getShortEndedElements(); selfClosing = settings.self_closing_elements || schema.getSelfClosingElements(); fillAttrsMap = schema.getBoolAttrs(); validate = settings.validate; removeInternalElements = settings.remove_internals; fixSelfClosing = settings.fix_self_closing; specialElements = schema.getSpecialElements(); while ((matches = tokenRegExp.exec(html))) { // Text if (index < matches.index) { self.text(decode(html.substr(index, matches.index - index))); } if ((value = matches[6])) { // End element value = value.toLowerCase(); // IE will add a ":" in front of elements it doesn't understand like custom elements or HTML5 elements if (value.charAt(0) === ':') { value = value.substr(1); } processEndTag(value); } else if ((value = matches[7])) { // Start element value = value.toLowerCase(); // IE will add a ":" in front of elements it doesn't understand like custom elements or HTML5 elements if (value.charAt(0) === ':') { value = value.substr(1); } isShortEnded = value in shortEndedElements; // Is self closing tag for example an