"use strict"; /*jshint boss:true*/ var namedCodepoints = require("./simple-html-tokenizer/char-refs").namedCodepoints; var objectCreate = require("./simple-html-tokenizer/helpers").objectCreate; var isSpace = require("./simple-html-tokenizer/helpers").isSpace; var isAlpha = require("./simple-html-tokenizer/helpers").isAlpha; var isUpper = require("./simple-html-tokenizer/helpers").isUpper; function preprocessInput(input) { return input.replace(/\r\n?/g, "\n"); } function Tokenizer(input) { this.input = preprocessInput(input); this.char = 0; this.line = 1; this.column = 0; this.state = 'data'; this.token = null; } Tokenizer.prototype = { tokenize: function() { var tokens = [], token; while (true) { token = this.lex(); if (token === 'EOF') { break; } if (token) { tokens.push(token); } } if (this.token) { tokens.push(this.token); } return tokens; }, tokenizePart: function(string) { this.input += preprocessInput(string); var tokens = [], token; while (this.char < this.input.length) { token = this.lex(); if (token) { tokens.push(token); } } this.tokens = (this.tokens || []).concat(tokens); return tokens; }, tokenizeEOF: function() { var token = this.token; if (token) { this.token = null; return token; } }, tag: function(Type, char) { var lastToken = this.token; this.token = new Type(char); this.state = 'tagName'; return lastToken; }, selfClosing: function() { this.token.selfClosing = true; }, attribute: function(char) { this.token.startAttribute(char); this.state = 'attributeName'; }, addToAttributeName: function(char) { this.token.addToAttributeName(char); }, addToAttributeValue: function(char) { this.token.addToAttributeValue(char); }, commentStart: function() { var lastToken = this.token; this.token = new CommentToken(); this.state = 'commentStart'; return lastToken; }, addToComment: function(char) { this.token.addChar(char); }, emitData: function() { this.addLocInfo(this.line, this.column - 1); var lastToken = this.token; this.token = null; this.state = 'tagOpen'; return lastToken; }, emitToken: function() { this.addLocInfo(); var lastToken = this.token.finalize(); this.token = null; this.state = 'data'; return lastToken; }, addData: function(char) { if (this.token === null) { this.token = new Chars(); this.markFirst(); } this.token.addChar(char); }, markFirst: function(line, column) { this.firstLine = (line === 0) ? 0 : (line || this.line); this.firstColumn = (column === 0) ? 0 : (column || this.column); }, addLocInfo: function(line, column) { if (!this.token) return; this.token.firstLine = this.firstLine; this.token.firstColumn = this.firstColumn; this.token.lastLine = (line === 0) ? 0 : (line || this.line); this.token.lastColumn = (column === 0) ? 0 : (column || this.column); }, consumeCharRef: function(allowedChar) { var matches; var input = this.input.slice(this.char); if (matches = input.match(/^#(?:x|X)([0-9A-Fa-f]+);/)) { this.char += matches[0].length; return String.fromCharCode(parseInt(matches[1], 16)); } else if (matches = input.match(/^#([0-9]+);/)) { this.char += matches[0].length; return String.fromCharCode(parseInt(matches[1], 10)); } else if (matches = input.match(/^([A-Za-z]+);/)) { var codepoints = namedCodepoints[matches[1]]; if (codepoints) { this.char += matches[0].length; for (var i = 0, str = ""; i < codepoints.length; i++) { str += String.fromCharCode(codepoints[i]); } return str; } } }, lex: function() { var char = this.input.charAt(this.char++); if (char) { if (char === "\n") { this.line++; this.column = 0; } else { this.column++; } // console.log(this.state, char); return this.states[this.state].call(this, char); } else { this.addLocInfo(this.line, this.column); return 'EOF'; } }, states: { data: function(char) { if (char === "<") { var chars = this.emitData(); this.markFirst(); return chars; } else if (char === "&") { this.addData(this.consumeCharRef() || "&"); } else { this.addData(char); } }, tagOpen: function(char) { if (char === "!") { this.state = 'markupDeclaration'; } else if (char === "/") { this.state = 'endTagOpen'; } else if (isAlpha(char)) { return this.tag(StartTag, char.toLowerCase()); } }, markupDeclaration: function(char) { if (char === "-" && this.input.charAt(this.char) === "-") { this.char++; this.commentStart(); } }, commentStart: function(char) { if (char === "-") { this.state = 'commentStartDash'; } else if (char === ">") { return this.emitToken(); } else { this.addToComment(char); this.state = 'comment'; } }, commentStartDash: function(char) { if (char === "-") { this.state = 'commentEnd'; } else if (char === ">") { return this.emitToken(); } else { this.addToComment("-"); this.state = 'comment'; } }, comment: function(char) { if (char === "-") { this.state = 'commentEndDash'; } else { this.addToComment(char); } }, commentEndDash: function(char) { if (char === "-") { this.state = 'commentEnd'; } else { this.addToComment("-" + char); this.state = 'comment'; } }, commentEnd: function(char) { if (char === ">") { return this.emitToken(); } else { this.addToComment("--" + char); this.state = 'comment'; } }, tagName: function(char) { if (isSpace(char)) { this.state = 'beforeAttributeName'; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === ">") { return this.emitToken(); } else { this.token.addToTagName(char); } }, beforeAttributeName: function(char) { if (isSpace(char)) { return; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === ">") { return this.emitToken(); } else { this.attribute(char); } }, attributeName: function(char) { if (isSpace(char)) { this.state = 'afterAttributeName'; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === "=") { this.state = 'beforeAttributeValue'; } else if (char === ">") { return this.emitToken(); } else { this.addToAttributeName(char); } }, afterAttributeName: function(char) { if (isSpace(char)) { return; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === "=") { this.state = 'beforeAttributeValue'; } else if (char === ">") { return this.emitToken(); } else { this.attribute(char); } }, beforeAttributeValue: function(char) { if (isSpace(char)) { return; } else if (char === '"') { this.state = 'attributeValueDoubleQuoted'; } else if (char === "'") { this.state = 'attributeValueSingleQuoted'; } else if (char === ">") { return this.emitToken(); } else { this.state = 'attributeValueUnquoted'; this.addToAttributeValue(char); } }, attributeValueDoubleQuoted: function(char) { if (char === '"') { this.state = 'afterAttributeValueQuoted'; } else if (char === "&") { this.addToAttributeValue(this.consumeCharRef('"') || "&"); } else { this.addToAttributeValue(char); } }, attributeValueSingleQuoted: function(char) { if (char === "'") { this.state = 'afterAttributeValueQuoted'; } else if (char === "&") { this.addToAttributeValue(this.consumeCharRef("'") || "&"); } else { this.addToAttributeValue(char); } }, attributeValueUnquoted: function(char) { if (isSpace(char)) { this.state = 'beforeAttributeName'; } else if (char === "&") { this.addToAttributeValue(this.consumeCharRef(">") || "&"); } else if (char === ">") { return this.emitToken(); } else { this.addToAttributeValue(char); } }, afterAttributeValueQuoted: function(char) { if (isSpace(char)) { this.state = 'beforeAttributeName'; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === ">") { return this.emitToken(); } else { this.char--; this.state = 'beforeAttributeName'; } }, selfClosingStartTag: function(char) { if (char === ">") { this.selfClosing(); return this.emitToken(); } else { this.char--; this.state = 'beforeAttributeName'; } }, endTagOpen: function(char) { if (isAlpha(char)) { this.tag(EndTag, char.toLowerCase()); } } } }; function Tag(tagName, attributes, options) { this.tagName = tagName || ""; this.attributes = attributes || []; this.selfClosing = options ? options.selfClosing : false; } Tag.prototype = { constructor: Tag, addToTagName: function(char) { this.tagName += char; }, startAttribute: function(char) { this.currentAttribute = [char.toLowerCase(), null]; this.attributes.push(this.currentAttribute); }, addToAttributeName: function(char) { this.currentAttribute[0] += char; }, addToAttributeValue: function(char) { this.currentAttribute[1] = this.currentAttribute[1] || ""; this.currentAttribute[1] += char; }, finalize: function() { delete this.currentAttribute; return this; } }; function StartTag() { Tag.apply(this, arguments); } StartTag.prototype = objectCreate(Tag.prototype); StartTag.prototype.type = 'StartTag'; StartTag.prototype.constructor = StartTag; StartTag.prototype.toHTML = function() { return config.generateTag(this); }; function generateTag(tag) { var out = "<"; out += tag.tagName; if (tag.attributes.length) { out += " " + config.generateAttributes(tag.attributes); } out += ">"; return out; } function generateAttributes(attributes) { var out = [], attribute, attrString, value; for (var i=0, l=attributes.length; i"; } }; function tokenize(input) { var tokenizer = new Tokenizer(input); return tokenizer.tokenize(); } function generate(tokens) { var output = ""; for (var i=0, l=tokens.length; i