The version of vichan running on lainchan.org
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

1741 行
64KB

  1. /**
  2. * @license
  3. * Copyright (C) 2006 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /**
  18. * @fileoverview
  19. * some functions for browser-side pretty printing of code contained in html.
  20. *
  21. * <p>
  22. * For a fairly comprehensive set of languages see the
  23. * <a href="https://github.com/google/code-prettify#for-which-languages-does-it-work">README</a>
  24. * file that came with this source. At a minimum, the lexer should work on a
  25. * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
  26. * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk
  27. * and a subset of Perl, but, because of commenting conventions, doesn't work on
  28. * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
  29. * <p>
  30. * Usage: <ol>
  31. * <li> include this source file in an html page via
  32. * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
  33. * <li> define style rules. See the example page for examples.
  34. * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
  35. * {@code class=prettyprint.}
  36. * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
  37. * printer needs to do more substantial DOM manipulations to support that, so
  38. * some css styles may not be preserved.
  39. * </ol>
  40. * That's it. I wanted to keep the API as simple as possible, so there's no
  41. * need to specify which language the code is in, but if you wish, you can add
  42. * another class to the {@code <pre>} or {@code <code>} element to specify the
  43. * language, as in {@code <pre class="prettyprint lang-java">}. Any class that
  44. * starts with "lang-" followed by a file extension, specifies the file type.
  45. * See the "lang-*.js" files in this directory for code that implements
  46. * per-language file handlers.
  47. * <p>
  48. * Change log:<br>
  49. * cbeust, 2006/08/22
  50. * <blockquote>
  51. * Java annotations (start with "@") are now captured as literals ("lit")
  52. * </blockquote>
  53. * @requires console
  54. */
  55. // JSLint declarations
  56. /*global console, document, navigator, setTimeout, window, define */
  57. /**
  58. * {@type !{
  59. * 'createSimpleLexer': function (Array, Array): (function (JobT)),
  60. * 'registerLangHandler': function (function (JobT), Array.<string>),
  61. * 'PR_ATTRIB_NAME': string,
  62. * 'PR_ATTRIB_NAME': string,
  63. * 'PR_ATTRIB_VALUE': string,
  64. * 'PR_COMMENT': string,
  65. * 'PR_DECLARATION': string,
  66. * 'PR_KEYWORD': string,
  67. * 'PR_LITERAL': string,
  68. * 'PR_NOCODE': string,
  69. * 'PR_PLAIN': string,
  70. * 'PR_PUNCTUATION': string,
  71. * 'PR_SOURCE': string,
  72. * 'PR_STRING': string,
  73. * 'PR_TAG': string,
  74. * 'PR_TYPE': string,
  75. * 'prettyPrintOne': function (string, string, number|boolean),
  76. * 'prettyPrint': function (?function, ?(HTMLElement|HTMLDocument))
  77. * }}
  78. * @const
  79. */
  80. /**
  81. * @typedef {!Array.<number|string>}
  82. * Alternating indices and the decorations that should be inserted there.
  83. * The indices are monotonically increasing.
  84. */
  85. var DecorationsT;
  86. /**
  87. * @typedef {!{
  88. * sourceNode: !Element,
  89. * pre: !(number|boolean),
  90. * langExtension: ?string,
  91. * numberLines: ?(number|boolean),
  92. * sourceCode: ?string,
  93. * spans: ?(Array.<number|Node>),
  94. * basePos: ?number,
  95. * decorations: ?DecorationsT
  96. * }}
  97. * <dl>
  98. * <dt>sourceNode<dd>the element containing the source
  99. * <dt>sourceCode<dd>source as plain text
  100. * <dt>pre<dd>truthy if white-space in text nodes
  101. * should be considered significant.
  102. * <dt>spans<dd> alternating span start indices into source
  103. * and the text node or element (e.g. {@code <BR>}) corresponding to that
  104. * span.
  105. * <dt>decorations<dd>an array of style classes preceded
  106. * by the position at which they start in job.sourceCode in order
  107. * <dt>basePos<dd>integer position of this.sourceCode in the larger chunk of
  108. * source.
  109. * </dl>
  110. */
  111. var JobT;
  112. /**
  113. * @typedef {!{
  114. * sourceCode: string,
  115. * spans: !(Array.<number|Node>)
  116. * }}
  117. * <dl>
  118. * <dt>sourceCode<dd>source as plain text
  119. * <dt>spans<dd> alternating span start indices into source
  120. * and the text node or element (e.g. {@code <BR>}) corresponding to that
  121. * span.
  122. * </dl>
  123. */
  124. var SourceSpansT;
  125. /** @define {boolean} */
  126. var IN_GLOBAL_SCOPE = false;
  127. var PR;
  128. /**
  129. * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
  130. * UI events.
  131. * If set to {@code false}, {@code prettyPrint()} is synchronous.
  132. */
  133. window['PR_SHOULD_USE_CONTINUATION'] = true;
  134. /**
  135. * Pretty print a chunk of code.
  136. * @param {string} sourceCodeHtml The HTML to pretty print.
  137. * @param {string} opt_langExtension The language name to use.
  138. * Typically, a filename extension like 'cpp' or 'java'.
  139. * @param {number|boolean} opt_numberLines True to number lines,
  140. * or the 1-indexed number of the first line in sourceCodeHtml.
  141. * @return {string} code as html, but prettier
  142. */
  143. var prettyPrintOne;
  144. /**
  145. * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
  146. * {@code class=prettyprint} and prettify them.
  147. *
  148. * @param {Function} opt_whenDone called when prettifying is done.
  149. * @param {HTMLElement|HTMLDocument} opt_root an element or document
  150. * containing all the elements to pretty print.
  151. * Defaults to {@code document.body}.
  152. */
  153. var prettyPrint;
  154. (function () {
  155. var win = window;
  156. // Keyword lists for various languages.
  157. // We use things that coerce to strings to make them compact when minified
  158. // and to defeat aggressive optimizers that fold large string constants.
  159. var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];
  160. var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +
  161. "double,enum,extern,float,goto,inline,int,long,register,short,signed," +
  162. "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"];
  163. var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +
  164. "new,operator,private,protected,public,this,throw,true,try,typeof"];
  165. var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," +
  166. "concept,concept_map,const_cast,constexpr,decltype,delegate," +
  167. "dynamic_cast,explicit,export,friend,generic,late_check," +
  168. "mutable,namespace,nullptr,property,reinterpret_cast,static_assert," +
  169. "static_cast,template,typeid,typename,using,virtual,where"];
  170. var JAVA_KEYWORDS = [COMMON_KEYWORDS,
  171. "abstract,assert,boolean,byte,extends,finally,final,implements,import," +
  172. "instanceof,interface,null,native,package,strictfp,super,synchronized," +
  173. "throws,transient"];
  174. var CSHARP_KEYWORDS = [COMMON_KEYWORDS,
  175. "abstract,as,base,bool,by,byte,checked,decimal,delegate,descending," +
  176. "dynamic,event,finally,fixed,foreach,from,group,implicit,in,interface," +
  177. "internal,into,is,let,lock,null,object,out,override,orderby,params," +
  178. "partial,readonly,ref,sbyte,sealed,stackalloc,string,select,uint,ulong," +
  179. "unchecked,unsafe,ushort,var,virtual,where"];
  180. var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +
  181. "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +
  182. "throw,true,try,unless,until,when,while,yes";
  183. var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,
  184. "abstract,async,await,constructor,debugger,enum,eval,export,function," +
  185. "get,implements,instanceof,interface,let,null,set,undefined,var,with," +
  186. "yield,Infinity,NaN"];
  187. var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +
  188. "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +
  189. "sub,undef,unless,until,use,wantarray,while,BEGIN,END";
  190. var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +
  191. "elif,except,exec,finally,from,global,import,in,is,lambda," +
  192. "nonlocal,not,or,pass,print,raise,try,with,yield," +
  193. "False,True,None"];
  194. var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +
  195. "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +
  196. "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +
  197. "BEGIN,END"];
  198. var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +
  199. "function,in,local,set,then,until"];
  200. var ALL_KEYWORDS = [
  201. CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS,
  202. PERL_KEYWORDS, PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
  203. var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)\b/;
  204. // token style names. correspond to css classes
  205. /**
  206. * token style for a string literal
  207. * @const
  208. */
  209. var PR_STRING = 'str';
  210. /**
  211. * token style for a keyword
  212. * @const
  213. */
  214. var PR_KEYWORD = 'kwd';
  215. /**
  216. * token style for a comment
  217. * @const
  218. */
  219. var PR_COMMENT = 'com';
  220. /**
  221. * token style for a type
  222. * @const
  223. */
  224. var PR_TYPE = 'typ';
  225. /**
  226. * token style for a literal value. e.g. 1, null, true.
  227. * @const
  228. */
  229. var PR_LITERAL = 'lit';
  230. /**
  231. * token style for a punctuation string.
  232. * @const
  233. */
  234. var PR_PUNCTUATION = 'pun';
  235. /**
  236. * token style for plain text.
  237. * @const
  238. */
  239. var PR_PLAIN = 'pln';
  240. /**
  241. * token style for an sgml tag.
  242. * @const
  243. */
  244. var PR_TAG = 'tag';
  245. /**
  246. * token style for a markup declaration such as a DOCTYPE.
  247. * @const
  248. */
  249. var PR_DECLARATION = 'dec';
  250. /**
  251. * token style for embedded source.
  252. * @const
  253. */
  254. var PR_SOURCE = 'src';
  255. /**
  256. * token style for an sgml attribute name.
  257. * @const
  258. */
  259. var PR_ATTRIB_NAME = 'atn';
  260. /**
  261. * token style for an sgml attribute value.
  262. * @const
  263. */
  264. var PR_ATTRIB_VALUE = 'atv';
  265. /**
  266. * A class that indicates a section of markup that is not code, e.g. to allow
  267. * embedding of line numbers within code listings.
  268. * @const
  269. */
  270. var PR_NOCODE = 'nocode';
  271. /**
  272. * A set of tokens that can precede a regular expression literal in
  273. * javascript
  274. * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html
  275. * has the full list, but I've removed ones that might be problematic when
  276. * seen in languages that don't support regular expression literals.
  277. *
  278. * <p>Specifically, I've removed any keywords that can't precede a regexp
  279. * literal in a syntactically legal javascript program, and I've removed the
  280. * "in" keyword since it's not a keyword in many languages, and might be used
  281. * as a count of inches.
  282. *
  283. * <p>The link above does not accurately describe EcmaScript rules since
  284. * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
  285. * very well in practice.
  286. *
  287. * @private
  288. * @const
  289. */
  290. var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';
  291. // CAVEAT: this does not properly handle the case where a regular
  292. // expression immediately follows another since a regular expression may
  293. // have flags for case-sensitivity and the like. Having regexp tokens
  294. // adjacent is not valid in any language I'm aware of, so I'm punting.
  295. // TODO: maybe style special characters inside a regexp as punctuation.
  296. /**
  297. * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
  298. * matches the union of the sets of strings matched by the input RegExp.
  299. * Since it matches globally, if the input strings have a start-of-input
  300. * anchor (/^.../), it is ignored for the purposes of unioning.
  301. * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
  302. * @return {RegExp} a global regex.
  303. */
  304. function combinePrefixPatterns(regexs) {
  305. var capturedGroupIndex = 0;
  306. var needToFoldCase = false;
  307. var ignoreCase = false;
  308. for (var i = 0, n = regexs.length; i < n; ++i) {
  309. var regex = regexs[i];
  310. if (regex.ignoreCase) {
  311. ignoreCase = true;
  312. } else if (/[a-z]/i.test(regex.source.replace(
  313. /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
  314. needToFoldCase = true;
  315. ignoreCase = false;
  316. break;
  317. }
  318. }
  319. var escapeCharToCodeUnit = {
  320. 'b': 8,
  321. 't': 9,
  322. 'n': 0xa,
  323. 'v': 0xb,
  324. 'f': 0xc,
  325. 'r': 0xd
  326. };
  327. function decodeEscape(charsetPart) {
  328. var cc0 = charsetPart.charCodeAt(0);
  329. if (cc0 !== 92 /* \\ */) {
  330. return cc0;
  331. }
  332. var c1 = charsetPart.charAt(1);
  333. cc0 = escapeCharToCodeUnit[c1];
  334. if (cc0) {
  335. return cc0;
  336. } else if ('0' <= c1 && c1 <= '7') {
  337. return parseInt(charsetPart.substring(1), 8);
  338. } else if (c1 === 'u' || c1 === 'x') {
  339. return parseInt(charsetPart.substring(2), 16);
  340. } else {
  341. return charsetPart.charCodeAt(1);
  342. }
  343. }
  344. function encodeEscape(charCode) {
  345. if (charCode < 0x20) {
  346. return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
  347. }
  348. var ch = String.fromCharCode(charCode);
  349. return (ch === '\\' || ch === '-' || ch === ']' || ch === '^')
  350. ? "\\" + ch : ch;
  351. }
  352. function caseFoldCharset(charSet) {
  353. var charsetParts = charSet.substring(1, charSet.length - 1).match(
  354. new RegExp(
  355. '\\\\u[0-9A-Fa-f]{4}'
  356. + '|\\\\x[0-9A-Fa-f]{2}'
  357. + '|\\\\[0-3][0-7]{0,2}'
  358. + '|\\\\[0-7]{1,2}'
  359. + '|\\\\[\\s\\S]'
  360. + '|-'
  361. + '|[^-\\\\]',
  362. 'g'));
  363. var ranges = [];
  364. var inverse = charsetParts[0] === '^';
  365. var out = ['['];
  366. if (inverse) { out.push('^'); }
  367. for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
  368. var p = charsetParts[i];
  369. if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.
  370. out.push(p);
  371. } else {
  372. var start = decodeEscape(p);
  373. var end;
  374. if (i + 2 < n && '-' === charsetParts[i + 1]) {
  375. end = decodeEscape(charsetParts[i + 2]);
  376. i += 2;
  377. } else {
  378. end = start;
  379. }
  380. ranges.push([start, end]);
  381. // If the range might intersect letters, then expand it.
  382. // This case handling is too simplistic.
  383. // It does not deal with non-latin case folding.
  384. // It works for latin source code identifiers though.
  385. if (!(end < 65 || start > 122)) {
  386. if (!(end < 65 || start > 90)) {
  387. ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
  388. }
  389. if (!(end < 97 || start > 122)) {
  390. ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
  391. }
  392. }
  393. }
  394. }
  395. // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
  396. // -> [[1, 12], [14, 14], [16, 17]]
  397. ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
  398. var consolidatedRanges = [];
  399. var lastRange = [];
  400. for (var i = 0; i < ranges.length; ++i) {
  401. var range = ranges[i];
  402. if (range[0] <= lastRange[1] + 1) {
  403. lastRange[1] = Math.max(lastRange[1], range[1]);
  404. } else {
  405. consolidatedRanges.push(lastRange = range);
  406. }
  407. }
  408. for (var i = 0; i < consolidatedRanges.length; ++i) {
  409. var range = consolidatedRanges[i];
  410. out.push(encodeEscape(range[0]));
  411. if (range[1] > range[0]) {
  412. if (range[1] + 1 > range[0]) { out.push('-'); }
  413. out.push(encodeEscape(range[1]));
  414. }
  415. }
  416. out.push(']');
  417. return out.join('');
  418. }
  419. function allowAnywhereFoldCaseAndRenumberGroups(regex) {
  420. // Split into character sets, escape sequences, punctuation strings
  421. // like ('(', '(?:', ')', '^'), and runs of characters that do not
  422. // include any of the above.
  423. var parts = regex.source.match(
  424. new RegExp(
  425. '(?:'
  426. + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
  427. + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
  428. + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
  429. + '|\\\\[0-9]+' // a back-reference or octal escape
  430. + '|\\\\[^ux0-9]' // other escape sequence
  431. + '|\\(\\?[:!=]' // start of a non-capturing group
  432. + '|[\\(\\)\\^]' // start/end of a group, or line start
  433. + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters
  434. + ')',
  435. 'g'));
  436. var n = parts.length;
  437. // Maps captured group numbers to the number they will occupy in
  438. // the output or to -1 if that has not been determined, or to
  439. // undefined if they need not be capturing in the output.
  440. var capturedGroups = [];
  441. // Walk over and identify back references to build the capturedGroups
  442. // mapping.
  443. for (var i = 0, groupIndex = 0; i < n; ++i) {
  444. var p = parts[i];
  445. if (p === '(') {
  446. // groups are 1-indexed, so max group index is count of '('
  447. ++groupIndex;
  448. } else if ('\\' === p.charAt(0)) {
  449. var decimalValue = +p.substring(1);
  450. if (decimalValue) {
  451. if (decimalValue <= groupIndex) {
  452. capturedGroups[decimalValue] = -1;
  453. } else {
  454. // Replace with an unambiguous escape sequence so that
  455. // an octal escape sequence does not turn into a backreference
  456. // to a capturing group from an earlier regex.
  457. parts[i] = encodeEscape(decimalValue);
  458. }
  459. }
  460. }
  461. }
  462. // Renumber groups and reduce capturing groups to non-capturing groups
  463. // where possible.
  464. for (var i = 1; i < capturedGroups.length; ++i) {
  465. if (-1 === capturedGroups[i]) {
  466. capturedGroups[i] = ++capturedGroupIndex;
  467. }
  468. }
  469. for (var i = 0, groupIndex = 0; i < n; ++i) {
  470. var p = parts[i];
  471. if (p === '(') {
  472. ++groupIndex;
  473. if (!capturedGroups[groupIndex]) {
  474. parts[i] = '(?:';
  475. }
  476. } else if ('\\' === p.charAt(0)) {
  477. var decimalValue = +p.substring(1);
  478. if (decimalValue && decimalValue <= groupIndex) {
  479. parts[i] = '\\' + capturedGroups[decimalValue];
  480. }
  481. }
  482. }
  483. // Remove any prefix anchors so that the output will match anywhere.
  484. // ^^ really does mean an anchored match though.
  485. for (var i = 0; i < n; ++i) {
  486. if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
  487. }
  488. // Expand letters to groups to handle mixing of case-sensitive and
  489. // case-insensitive patterns if necessary.
  490. if (regex.ignoreCase && needToFoldCase) {
  491. for (var i = 0; i < n; ++i) {
  492. var p = parts[i];
  493. var ch0 = p.charAt(0);
  494. if (p.length >= 2 && ch0 === '[') {
  495. parts[i] = caseFoldCharset(p);
  496. } else if (ch0 !== '\\') {
  497. // TODO: handle letters in numeric escapes.
  498. parts[i] = p.replace(
  499. /[a-zA-Z]/g,
  500. function (ch) {
  501. var cc = ch.charCodeAt(0);
  502. return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
  503. });
  504. }
  505. }
  506. }
  507. return parts.join('');
  508. }
  509. var rewritten = [];
  510. for (var i = 0, n = regexs.length; i < n; ++i) {
  511. var regex = regexs[i];
  512. if (regex.global || regex.multiline) { throw new Error('' + regex); }
  513. rewritten.push(
  514. '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
  515. }
  516. return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
  517. }
  518. /**
  519. * Split markup into a string of source code and an array mapping ranges in
  520. * that string to the text nodes in which they appear.
  521. *
  522. * <p>
  523. * The HTML DOM structure:</p>
  524. * <pre>
  525. * (Element "p"
  526. * (Element "b"
  527. * (Text "print ")) ; #1
  528. * (Text "'Hello '") ; #2
  529. * (Element "br") ; #3
  530. * (Text " + 'World';")) ; #4
  531. * </pre>
  532. * <p>
  533. * corresponds to the HTML
  534. * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>
  535. *
  536. * <p>
  537. * It will produce the output:</p>
  538. * <pre>
  539. * {
  540. * sourceCode: "print 'Hello '\n + 'World';",
  541. * // 1 2
  542. * // 012345678901234 5678901234567
  543. * spans: [0, #1, 6, #2, 14, #3, 15, #4]
  544. * }
  545. * </pre>
  546. * <p>
  547. * where #1 is a reference to the {@code "print "} text node above, and so
  548. * on for the other text nodes.
  549. * </p>
  550. *
  551. * <p>
  552. * The {@code} spans array is an array of pairs. Even elements are the start
  553. * indices of substrings, and odd elements are the text nodes (or BR elements)
  554. * that contain the text for those substrings.
  555. * Substrings continue until the next index or the end of the source.
  556. * </p>
  557. *
  558. * @param {Node} node an HTML DOM subtree containing source-code.
  559. * @param {boolean|number} isPreformatted truthy if white-space in
  560. * text nodes should be considered significant.
  561. * @return {SourceSpansT} source code and the nodes in which they occur.
  562. */
  563. function extractSourceSpans(node, isPreformatted) {
  564. var nocode = /(?:^|\s)nocode(?:\s|$)/;
  565. var chunks = [];
  566. var length = 0;
  567. var spans = [];
  568. var k = 0;
  569. function walk(node) {
  570. var type = node.nodeType;
  571. if (type == 1) { // Element
  572. if (nocode.test(node.className)) { return; }
  573. for (var child = node.firstChild; child; child = child.nextSibling) {
  574. walk(child);
  575. }
  576. var nodeName = node.nodeName.toLowerCase();
  577. if ('br' === nodeName || 'li' === nodeName) {
  578. chunks[k] = '\n';
  579. spans[k << 1] = length++;
  580. spans[(k++ << 1) | 1] = node;
  581. }
  582. } else if (type == 3 || type == 4) { // Text
  583. var text = node.nodeValue;
  584. if (text.length) {
  585. if (!isPreformatted) {
  586. text = text.replace(/[ \t\r\n]+/g, ' ');
  587. } else {
  588. text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.
  589. }
  590. // TODO: handle tabs here?
  591. chunks[k] = text;
  592. spans[k << 1] = length;
  593. length += text.length;
  594. spans[(k++ << 1) | 1] = node;
  595. }
  596. }
  597. }
  598. walk(node);
  599. return {
  600. sourceCode: chunks.join('').replace(/\n$/, ''),
  601. spans: spans
  602. };
  603. }
  604. /**
  605. * Apply the given language handler to sourceCode and add the resulting
  606. * decorations to out.
  607. * @param {!Element} sourceNode
  608. * @param {number} basePos the index of sourceCode within the chunk of source
  609. * whose decorations are already present on out.
  610. * @param {string} sourceCode
  611. * @param {function(JobT)} langHandler
  612. * @param {DecorationsT} out
  613. */
  614. function appendDecorations(
  615. sourceNode, basePos, sourceCode, langHandler, out) {
  616. if (!sourceCode) { return; }
  617. /** @type {JobT} */
  618. var job = {
  619. sourceNode: sourceNode,
  620. pre: 1,
  621. langExtension: null,
  622. numberLines: null,
  623. sourceCode: sourceCode,
  624. spans: null,
  625. basePos: basePos,
  626. decorations: null
  627. };
  628. langHandler(job);
  629. out.push.apply(out, job.decorations);
  630. }
  631. var notWs = /\S/;
  632. /**
  633. * Given an element, if it contains only one child element and any text nodes
  634. * it contains contain only space characters, return the sole child element.
  635. * Otherwise returns undefined.
  636. * <p>
  637. * This is meant to return the CODE element in {@code <pre><code ...>} when
  638. * there is a single child element that contains all the non-space textual
  639. * content, but not to return anything where there are multiple child elements
  640. * as in {@code <pre><code>...</code><code>...</code></pre>} or when there
  641. * is textual content.
  642. */
  643. function childContentWrapper(element) {
  644. var wrapper = undefined;
  645. for (var c = element.firstChild; c; c = c.nextSibling) {
  646. var type = c.nodeType;
  647. wrapper = (type === 1) // Element Node
  648. ? (wrapper ? element : c)
  649. : (type === 3) // Text Node
  650. ? (notWs.test(c.nodeValue) ? element : wrapper)
  651. : wrapper;
  652. }
  653. return wrapper === element ? undefined : wrapper;
  654. }
  655. /** Given triples of [style, pattern, context] returns a lexing function,
  656. * The lexing function interprets the patterns to find token boundaries and
  657. * returns a decoration list of the form
  658. * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
  659. * where index_n is an index into the sourceCode, and style_n is a style
  660. * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
  661. * all characters in sourceCode[index_n-1:index_n].
  662. *
  663. * The stylePatterns is a list whose elements have the form
  664. * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
  665. *
  666. * Style is a style constant like PR_PLAIN, or can be a string of the
  667. * form 'lang-FOO', where FOO is a language extension describing the
  668. * language of the portion of the token in $1 after pattern executes.
  669. * E.g., if style is 'lang-lisp', and group 1 contains the text
  670. * '(hello (world))', then that portion of the token will be passed to the
  671. * registered lisp handler for formatting.
  672. * The text before and after group 1 will be restyled using this decorator
  673. * so decorators should take care that this doesn't result in infinite
  674. * recursion. For example, the HTML lexer rule for SCRIPT elements looks
  675. * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
  676. * '<script>foo()<\/script>', which would cause the current decorator to
  677. * be called with '<script>' which would not match the same rule since
  678. * group 1 must not be empty, so it would be instead styled as PR_TAG by
  679. * the generic tag rule. The handler registered for the 'js' extension would
  680. * then be called with 'foo()', and finally, the current decorator would
  681. * be called with '<\/script>' which would not match the original rule and
  682. * so the generic tag rule would identify it as a tag.
  683. *
  684. * Pattern must only match prefixes, and if it matches a prefix, then that
  685. * match is considered a token with the same style.
  686. *
  687. * Context is applied to the last non-whitespace, non-comment token
  688. * recognized.
  689. *
  690. * Shortcut is an optional string of characters, any of which, if the first
  691. * character, gurantee that this pattern and only this pattern matches.
  692. *
  693. * @param {Array} shortcutStylePatterns patterns that always start with
  694. * a known character. Must have a shortcut string.
  695. * @param {Array} fallthroughStylePatterns patterns that will be tried in
  696. * order if the shortcut ones fail. May have shortcuts.
  697. *
  698. * @return {function (JobT)} a function that takes an undecorated job and
  699. * attaches a list of decorations.
  700. */
  701. function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
  702. var shortcuts = {};
  703. var tokenizer;
  704. (function () {
  705. var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
  706. var allRegexs = [];
  707. var regexKeys = {};
  708. for (var i = 0, n = allPatterns.length; i < n; ++i) {
  709. var patternParts = allPatterns[i];
  710. var shortcutChars = patternParts[3];
  711. if (shortcutChars) {
  712. for (var c = shortcutChars.length; --c >= 0;) {
  713. shortcuts[shortcutChars.charAt(c)] = patternParts;
  714. }
  715. }
  716. var regex = patternParts[1];
  717. var k = '' + regex;
  718. if (!regexKeys.hasOwnProperty(k)) {
  719. allRegexs.push(regex);
  720. regexKeys[k] = null;
  721. }
  722. }
  723. allRegexs.push(/[\0-\uffff]/);
  724. tokenizer = combinePrefixPatterns(allRegexs);
  725. })();
  726. var nPatterns = fallthroughStylePatterns.length;
  727. /**
  728. * Lexes job.sourceCode and attaches an output array job.decorations of
  729. * style classes preceded by the position at which they start in
  730. * job.sourceCode in order.
  731. *
  732. * @type{function (JobT)}
  733. */
  734. var decorate = function (job) {
  735. var sourceCode = job.sourceCode, basePos = job.basePos;
  736. var sourceNode = job.sourceNode;
  737. /** Even entries are positions in source in ascending order. Odd enties
  738. * are style markers (e.g., PR_COMMENT) that run from that position until
  739. * the end.
  740. * @type {DecorationsT}
  741. */
  742. var decorations = [basePos, PR_PLAIN];
  743. var pos = 0; // index into sourceCode
  744. var tokens = sourceCode.match(tokenizer) || [];
  745. var styleCache = {};
  746. for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
  747. var token = tokens[ti];
  748. var style = styleCache[token];
  749. var match = void 0;
  750. var isEmbedded;
  751. if (typeof style === 'string') {
  752. isEmbedded = false;
  753. } else {
  754. var patternParts = shortcuts[token.charAt(0)];
  755. if (patternParts) {
  756. match = token.match(patternParts[1]);
  757. style = patternParts[0];
  758. } else {
  759. for (var i = 0; i < nPatterns; ++i) {
  760. patternParts = fallthroughStylePatterns[i];
  761. match = token.match(patternParts[1]);
  762. if (match) {
  763. style = patternParts[0];
  764. break;
  765. }
  766. }
  767. if (!match) { // make sure that we make progress
  768. style = PR_PLAIN;
  769. }
  770. }
  771. isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
  772. if (isEmbedded && !(match && typeof match[1] === 'string')) {
  773. isEmbedded = false;
  774. style = PR_SOURCE;
  775. }
  776. if (!isEmbedded) { styleCache[token] = style; }
  777. }
  778. var tokenStart = pos;
  779. pos += token.length;
  780. if (!isEmbedded) {
  781. decorations.push(basePos + tokenStart, style);
  782. } else { // Treat group 1 as an embedded block of source code.
  783. var embeddedSource = match[1];
  784. var embeddedSourceStart = token.indexOf(embeddedSource);
  785. var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
  786. if (match[2]) {
  787. // If embeddedSource can be blank, then it would match at the
  788. // beginning which would cause us to infinitely recurse on the
  789. // entire token, so we catch the right context in match[2].
  790. embeddedSourceEnd = token.length - match[2].length;
  791. embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
  792. }
  793. var lang = style.substring(5);
  794. // Decorate the left of the embedded source
  795. appendDecorations(
  796. sourceNode,
  797. basePos + tokenStart,
  798. token.substring(0, embeddedSourceStart),
  799. decorate, decorations);
  800. // Decorate the embedded source
  801. appendDecorations(
  802. sourceNode,
  803. basePos + tokenStart + embeddedSourceStart,
  804. embeddedSource,
  805. langHandlerForExtension(lang, embeddedSource),
  806. decorations);
  807. // Decorate the right of the embedded section
  808. appendDecorations(
  809. sourceNode,
  810. basePos + tokenStart + embeddedSourceEnd,
  811. token.substring(embeddedSourceEnd),
  812. decorate, decorations);
  813. }
  814. }
  815. job.decorations = decorations;
  816. };
  817. return decorate;
  818. }
  819. /** returns a function that produces a list of decorations from source text.
  820. *
  821. * This code treats ", ', and ` as string delimiters, and \ as a string
  822. * escape. It does not recognize perl's qq() style strings.
  823. * It has no special handling for double delimiter escapes as in basic, or
  824. * the tripled delimiters used in python, but should work on those regardless
  825. * although in those cases a single string literal may be broken up into
  826. * multiple adjacent string literals.
  827. *
  828. * It recognizes C, C++, and shell style comments.
  829. *
  830. * @param {Object} options a set of optional parameters.
  831. * @return {function (JobT)} a function that examines the source code
  832. * in the input job and builds a decoration list which it attaches to
  833. * the job.
  834. */
  835. function sourceDecorator(options) {
  836. var shortcutStylePatterns = [], fallthroughStylePatterns = [];
  837. if (options['tripleQuotedStrings']) {
  838. // '''multi-line-string''', 'single-line-string', and double-quoted
  839. shortcutStylePatterns.push(
  840. [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
  841. null, '\'"']);
  842. } else if (options['multiLineStrings']) {
  843. // 'multi-line-string', "multi-line-string"
  844. shortcutStylePatterns.push(
  845. [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
  846. null, '\'"`']);
  847. } else {
  848. // 'single-line-string', "single-line-string"
  849. shortcutStylePatterns.push(
  850. [PR_STRING,
  851. /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
  852. null, '"\'']);
  853. }
  854. if (options['verbatimStrings']) {
  855. // verbatim-string-literal production from the C# grammar. See issue 93.
  856. fallthroughStylePatterns.push(
  857. [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
  858. }
  859. var hc = options['hashComments'];
  860. if (hc) {
  861. if (options['cStyleComments']) {
  862. if (hc > 1) { // multiline hash comments
  863. shortcutStylePatterns.push(
  864. [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
  865. } else {
  866. // Stop C preprocessor declarations at an unclosed open comment
  867. shortcutStylePatterns.push(
  868. [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/,
  869. null, '#']);
  870. }
  871. // #include <stdio.h>
  872. fallthroughStylePatterns.push(
  873. [PR_STRING,
  874. /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/,
  875. null]);
  876. } else {
  877. shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
  878. }
  879. }
  880. if (options['cStyleComments']) {
  881. fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
  882. fallthroughStylePatterns.push(
  883. [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
  884. }
  885. var regexLiterals = options['regexLiterals'];
  886. if (regexLiterals) {
  887. /**
  888. * @const
  889. */
  890. var regexExcls = regexLiterals > 1
  891. ? '' // Multiline regex literals
  892. : '\n\r';
  893. /**
  894. * @const
  895. */
  896. var regexAny = regexExcls ? '.' : '[\\S\\s]';
  897. /**
  898. * @const
  899. */
  900. var REGEX_LITERAL = (
  901. // A regular expression literal starts with a slash that is
  902. // not followed by * or / so that it is not confused with
  903. // comments.
  904. '/(?=[^/*' + regexExcls + '])'
  905. // and then contains any number of raw characters,
  906. + '(?:[^/\\x5B\\x5C' + regexExcls + ']'
  907. // escape sequences (\x5C),
  908. + '|\\x5C' + regexAny
  909. // or non-nesting character sets (\x5B\x5D);
  910. + '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']'
  911. + '|\\x5C' + regexAny + ')*(?:\\x5D|$))+'
  912. // finally closed by a /.
  913. + '/');
  914. fallthroughStylePatterns.push(
  915. ['lang-regex',
  916. RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
  917. ]);
  918. }
  919. var types = options['types'];
  920. if (types) {
  921. fallthroughStylePatterns.push([PR_TYPE, types]);
  922. }
  923. var keywords = ("" + options['keywords']).replace(/^ | $/g, '');
  924. if (keywords.length) {
  925. fallthroughStylePatterns.push(
  926. [PR_KEYWORD,
  927. new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),
  928. null]);
  929. }
  930. shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
  931. var punctuation =
  932. // The Bash man page says
  933. // A word is a sequence of characters considered as a single
  934. // unit by GRUB. Words are separated by metacharacters,
  935. // which are the following plus space, tab, and newline: { }
  936. // | & $ ; < >
  937. // ...
  938. // A word beginning with # causes that word and all remaining
  939. // characters on that line to be ignored.
  940. // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a
  941. // comment but empirically
  942. // $ echo {#}
  943. // {#}
  944. // $ echo \$#
  945. // $#
  946. // $ echo }#
  947. // }#
  948. // so /(?:^|[|&;<>\s])/ is more appropriate.
  949. // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3
  950. // suggests that this definition is compatible with a
  951. // default mode that tries to use a single token definition
  952. // to recognize both bash/python style comments and C
  953. // preprocessor directives.
  954. // This definition of punctuation does not include # in the list of
  955. // follow-on exclusions, so # will not be broken before if preceeded
  956. // by a punctuation character. We could try to exclude # after
  957. // [|&;<>] but that doesn't seem to cause many major problems.
  958. // If that does turn out to be a problem, we should change the below
  959. // when hc is truthy to include # in the run of punctuation characters
  960. // only when not followint [|&;<>].
  961. '^.[^\\s\\w.$@\'"`/\\\\]*';
  962. if (options['regexLiterals']) {
  963. punctuation += '(?!\s*\/)';
  964. }
  965. fallthroughStylePatterns.push(
  966. // TODO(mikesamuel): recognize non-latin letters and numerals in idents
  967. [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
  968. [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],
  969. [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],
  970. [PR_LITERAL,
  971. new RegExp(
  972. '^(?:'
  973. // A hex number
  974. + '0x[a-f0-9]+'
  975. // or an octal or decimal number,
  976. + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
  977. // possibly in scientific notation
  978. + '(?:e[+\\-]?\\d+)?'
  979. + ')'
  980. // with an optional modifier like UL for unsigned long
  981. + '[a-z]*', 'i'),
  982. null, '0123456789'],
  983. // Don't treat escaped quotes in bash as starting strings.
  984. // See issue 144.
  985. [PR_PLAIN, /^\\[\s\S]?/, null],
  986. [PR_PUNCTUATION, new RegExp(punctuation), null]);
  987. return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
  988. }
  989. var decorateSource = sourceDecorator({
  990. 'keywords': ALL_KEYWORDS,
  991. 'hashComments': true,
  992. 'cStyleComments': true,
  993. 'multiLineStrings': true,
  994. 'regexLiterals': true
  995. });
  996. /**
  997. * Given a DOM subtree, wraps it in a list, and puts each line into its own
  998. * list item.
  999. *
  1000. * @param {Node} node modified in place. Its content is pulled into an
  1001. * HTMLOListElement, and each line is moved into a separate list item.
  1002. * This requires cloning elements, so the input might not have unique
  1003. * IDs after numbering.
  1004. * @param {number|null|boolean} startLineNum
  1005. * If truthy, coerced to an integer which is the 1-indexed line number
  1006. * of the first line of code. The number of the first line will be
  1007. * attached to the list.
  1008. * @param {boolean} isPreformatted true iff white-space in text nodes should
  1009. * be treated as significant.
  1010. */
  1011. function numberLines(node, startLineNum, isPreformatted) {
  1012. var nocode = /(?:^|\s)nocode(?:\s|$)/;
  1013. var lineBreak = /\r\n?|\n/;
  1014. var document = node.ownerDocument;
  1015. var li = document.createElement('li');
  1016. while (node.firstChild) {
  1017. li.appendChild(node.firstChild);
  1018. }
  1019. // An array of lines. We split below, so this is initialized to one
  1020. // un-split line.
  1021. var listItems = [li];
  1022. function walk(node) {
  1023. var type = node.nodeType;
  1024. if (type == 1 && !nocode.test(node.className)) { // Element
  1025. if ('br' === node.nodeName) {
  1026. breakAfter(node);
  1027. // Discard the <BR> since it is now flush against a </LI>.
  1028. if (node.parentNode) {
  1029. node.parentNode.removeChild(node);
  1030. }
  1031. } else {
  1032. for (var child = node.firstChild; child; child = child.nextSibling) {
  1033. walk(child);
  1034. }
  1035. }
  1036. } else if ((type == 3 || type == 4) && isPreformatted) { // Text
  1037. var text = node.nodeValue;
  1038. var match = text.match(lineBreak);
  1039. if (match) {
  1040. var firstLine = text.substring(0, match.index);
  1041. node.nodeValue = firstLine;
  1042. var tail = text.substring(match.index + match[0].length);
  1043. if (tail) {
  1044. var parent = node.parentNode;
  1045. parent.insertBefore(
  1046. document.createTextNode(tail), node.nextSibling);
  1047. }
  1048. breakAfter(node);
  1049. if (!firstLine) {
  1050. // Don't leave blank text nodes in the DOM.
  1051. node.parentNode.removeChild(node);
  1052. }
  1053. }
  1054. }
  1055. }
  1056. // Split a line after the given node.
  1057. function breakAfter(lineEndNode) {
  1058. // If there's nothing to the right, then we can skip ending the line
  1059. // here, and move root-wards since splitting just before an end-tag
  1060. // would require us to create a bunch of empty copies.
  1061. while (!lineEndNode.nextSibling) {
  1062. lineEndNode = lineEndNode.parentNode;
  1063. if (!lineEndNode) { return; }
  1064. }
  1065. function breakLeftOf(limit, copy) {
  1066. // Clone shallowly if this node needs to be on both sides of the break.
  1067. var rightSide = copy ? limit.cloneNode(false) : limit;
  1068. var parent = limit.parentNode;
  1069. if (parent) {
  1070. // We clone the parent chain.
  1071. // This helps us resurrect important styling elements that cross lines.
  1072. // E.g. in <i>Foo<br>Bar</i>
  1073. // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
  1074. var parentClone = breakLeftOf(parent, 1);
  1075. // Move the clone and everything to the right of the original
  1076. // onto the cloned parent.
  1077. var next = limit.nextSibling;
  1078. parentClone.appendChild(rightSide);
  1079. for (var sibling = next; sibling; sibling = next) {
  1080. next = sibling.nextSibling;
  1081. parentClone.appendChild(sibling);
  1082. }
  1083. }
  1084. return rightSide;
  1085. }
  1086. var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
  1087. // Walk the parent chain until we reach an unattached LI.
  1088. for (var parent;
  1089. // Check nodeType since IE invents document fragments.
  1090. (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
  1091. copiedListItem = parent;
  1092. }
  1093. // Put it on the list of lines for later processing.
  1094. listItems.push(copiedListItem);
  1095. }
  1096. // Split lines while there are lines left to split.
  1097. for (var i = 0; // Number of lines that have been split so far.
  1098. i < listItems.length; // length updated by breakAfter calls.
  1099. ++i) {
  1100. walk(listItems[i]);
  1101. }
  1102. // Make sure numeric indices show correctly.
  1103. if (startLineNum === (startLineNum|0)) {
  1104. listItems[0].setAttribute('value', startLineNum);
  1105. }
  1106. var ol = document.createElement('ol');
  1107. ol.className = 'linenums';
  1108. var offset = Math.max(0, ((startLineNum - 1 /* zero index */)) | 0) || 0;
  1109. for (var i = 0, n = listItems.length; i < n; ++i) {
  1110. li = listItems[i];
  1111. // Stick a class on the LIs so that stylesheets can
  1112. // color odd/even rows, or any other row pattern that
  1113. // is co-prime with 10.
  1114. li.className = 'L' + ((i + offset) % 10);
  1115. if (!li.firstChild) {
  1116. li.appendChild(document.createTextNode('\xA0'));
  1117. }
  1118. ol.appendChild(li);
  1119. }
  1120. node.appendChild(ol);
  1121. }
  1122. /**
  1123. * Breaks {@code job.sourceCode} around style boundaries in
  1124. * {@code job.decorations} and modifies {@code job.sourceNode} in place.
  1125. * @param {JobT} job
  1126. * @private
  1127. */
  1128. function recombineTagsAndDecorations(job) {
  1129. var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent);
  1130. isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8;
  1131. var newlineRe = /\n/g;
  1132. var source = job.sourceCode;
  1133. var sourceLength = source.length;
  1134. // Index into source after the last code-unit recombined.
  1135. var sourceIndex = 0;
  1136. var spans = job.spans;
  1137. var nSpans = spans.length;
  1138. // Index into spans after the last span which ends at or before sourceIndex.
  1139. var spanIndex = 0;
  1140. var decorations = job.decorations;
  1141. var nDecorations = decorations.length;
  1142. // Index into decorations after the last decoration which ends at or before
  1143. // sourceIndex.
  1144. var decorationIndex = 0;
  1145. // Remove all zero-length decorations.
  1146. decorations[nDecorations] = sourceLength;
  1147. var decPos, i;
  1148. for (i = decPos = 0; i < nDecorations;) {
  1149. if (decorations[i] !== decorations[i + 2]) {
  1150. decorations[decPos++] = decorations[i++];
  1151. decorations[decPos++] = decorations[i++];
  1152. } else {
  1153. i += 2;
  1154. }
  1155. }
  1156. nDecorations = decPos;
  1157. // Simplify decorations.
  1158. for (i = decPos = 0; i < nDecorations;) {
  1159. var startPos = decorations[i];
  1160. // Conflate all adjacent decorations that use the same style.
  1161. var startDec = decorations[i + 1];
  1162. var end = i + 2;
  1163. while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
  1164. end += 2;
  1165. }
  1166. decorations[decPos++] = startPos;
  1167. decorations[decPos++] = startDec;
  1168. i = end;
  1169. }
  1170. nDecorations = decorations.length = decPos;
  1171. var sourceNode = job.sourceNode;
  1172. var oldDisplay = "";
  1173. if (sourceNode) {
  1174. oldDisplay = sourceNode.style.display;
  1175. sourceNode.style.display = 'none';
  1176. }
  1177. try {
  1178. var decoration = null;
  1179. while (spanIndex < nSpans) {
  1180. var spanStart = spans[spanIndex];
  1181. var spanEnd = /** @type{number} */ (spans[spanIndex + 2])
  1182. || sourceLength;
  1183. var decEnd = decorations[decorationIndex + 2] || sourceLength;
  1184. var end = Math.min(spanEnd, decEnd);
  1185. var textNode = /** @type{Node} */ (spans[spanIndex + 1]);
  1186. var styledText;
  1187. if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
  1188. // Don't introduce spans around empty text nodes.
  1189. && (styledText = source.substring(sourceIndex, end))) {
  1190. // This may seem bizarre, and it is. Emitting LF on IE causes the
  1191. // code to display with spaces instead of line breaks.
  1192. // Emitting Windows standard issue linebreaks (CRLF) causes a blank
  1193. // space to appear at the beginning of every line but the first.
  1194. // Emitting an old Mac OS 9 line separator makes everything spiffy.
  1195. if (isIE8OrEarlier) {
  1196. styledText = styledText.replace(newlineRe, '\r');
  1197. }
  1198. textNode.nodeValue = styledText;
  1199. var document = textNode.ownerDocument;
  1200. var span = document.createElement('span');
  1201. span.className = decorations[decorationIndex + 1];
  1202. var parentNode = textNode.parentNode;
  1203. parentNode.replaceChild(span, textNode);
  1204. span.appendChild(textNode);
  1205. if (sourceIndex < spanEnd) { // Split off a text node.
  1206. spans[spanIndex + 1] = textNode
  1207. // TODO: Possibly optimize by using '' if there's no flicker.
  1208. = document.createTextNode(source.substring(end, spanEnd));
  1209. parentNode.insertBefore(textNode, span.nextSibling);
  1210. }
  1211. }
  1212. sourceIndex = end;
  1213. if (sourceIndex >= spanEnd) {
  1214. spanIndex += 2;
  1215. }
  1216. if (sourceIndex >= decEnd) {
  1217. decorationIndex += 2;
  1218. }
  1219. }
  1220. } finally {
  1221. if (sourceNode) {
  1222. sourceNode.style.display = oldDisplay;
  1223. }
  1224. }
  1225. }
  1226. /** Maps language-specific file extensions to handlers. */
  1227. var langHandlerRegistry = {};
  1228. /** Register a language handler for the given file extensions.
  1229. * @param {function (JobT)} handler a function from source code to a list
  1230. * of decorations. Takes a single argument job which describes the
  1231. * state of the computation and attaches the decorations to it.
  1232. * @param {Array.<string>} fileExtensions
  1233. */
  1234. function registerLangHandler(handler, fileExtensions) {
  1235. for (var i = fileExtensions.length; --i >= 0;) {
  1236. var ext = fileExtensions[i];
  1237. if (!langHandlerRegistry.hasOwnProperty(ext)) {
  1238. langHandlerRegistry[ext] = handler;
  1239. } else if (win['console']) {
  1240. console['warn']('cannot override language handler %s', ext);
  1241. }
  1242. }
  1243. }
  1244. function langHandlerForExtension(extension, source) {
  1245. if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
  1246. // Treat it as markup if the first non whitespace character is a < and
  1247. // the last non-whitespace character is a >.
  1248. extension = /^\s*</.test(source)
  1249. ? 'default-markup'
  1250. : 'default-code';
  1251. }
  1252. return langHandlerRegistry[extension];
  1253. }
  1254. registerLangHandler(decorateSource, ['default-code']);
  1255. registerLangHandler(
  1256. createSimpleLexer(
  1257. [],
  1258. [
  1259. [PR_PLAIN, /^[^<?]+/],
  1260. [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
  1261. [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/],
  1262. // Unescaped content in an unknown language
  1263. ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/],
  1264. ['lang-', /^<%([\s\S]+?)(?:%>|$)/],
  1265. [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
  1266. ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
  1267. // Unescaped content in javascript. (Or possibly vbscript).
  1268. ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
  1269. // Contains unescaped stylesheet content
  1270. ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
  1271. ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]
  1272. ]),
  1273. ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
  1274. registerLangHandler(
  1275. createSimpleLexer(
  1276. [
  1277. [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],
  1278. [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
  1279. ],
  1280. [
  1281. [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
  1282. [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
  1283. ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
  1284. [PR_PUNCTUATION, /^[=<>\/]+/],
  1285. ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],
  1286. ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],
  1287. ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],
  1288. ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],
  1289. ['lang-css', /^style\s*=\s*\'([^\']+)\'/i],
  1290. ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]
  1291. ]),
  1292. ['in.tag']);
  1293. registerLangHandler(
  1294. createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
  1295. registerLangHandler(sourceDecorator({
  1296. 'keywords': CPP_KEYWORDS,
  1297. 'hashComments': true,
  1298. 'cStyleComments': true,
  1299. 'types': C_TYPES
  1300. }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
  1301. registerLangHandler(sourceDecorator({
  1302. 'keywords': 'null,true,false'
  1303. }), ['json']);
  1304. registerLangHandler(sourceDecorator({
  1305. 'keywords': CSHARP_KEYWORDS,
  1306. 'hashComments': true,
  1307. 'cStyleComments': true,
  1308. 'verbatimStrings': true,
  1309. 'types': C_TYPES
  1310. }), ['cs']);
  1311. registerLangHandler(sourceDecorator({
  1312. 'keywords': JAVA_KEYWORDS,
  1313. 'cStyleComments': true
  1314. }), ['java']);
  1315. registerLangHandler(sourceDecorator({
  1316. 'keywords': SH_KEYWORDS,
  1317. 'hashComments': true,
  1318. 'multiLineStrings': true
  1319. }), ['bash', 'bsh', 'csh', 'sh']);
  1320. registerLangHandler(sourceDecorator({
  1321. 'keywords': PYTHON_KEYWORDS,
  1322. 'hashComments': true,
  1323. 'multiLineStrings': true,
  1324. 'tripleQuotedStrings': true
  1325. }), ['cv', 'py', 'python']);
  1326. registerLangHandler(sourceDecorator({
  1327. 'keywords': PERL_KEYWORDS,
  1328. 'hashComments': true,
  1329. 'multiLineStrings': true,
  1330. 'regexLiterals': 2 // multiline regex literals
  1331. }), ['perl', 'pl', 'pm']);
  1332. registerLangHandler(sourceDecorator({
  1333. 'keywords': RUBY_KEYWORDS,
  1334. 'hashComments': true,
  1335. 'multiLineStrings': true,
  1336. 'regexLiterals': true
  1337. }), ['rb', 'ruby']);
  1338. registerLangHandler(sourceDecorator({
  1339. 'keywords': JSCRIPT_KEYWORDS,
  1340. 'cStyleComments': true,
  1341. 'regexLiterals': true
  1342. }), ['javascript', 'js', 'ts', 'typescript']);
  1343. registerLangHandler(sourceDecorator({
  1344. 'keywords': COFFEE_KEYWORDS,
  1345. 'hashComments': 3, // ### style block comments
  1346. 'cStyleComments': true,
  1347. 'multilineStrings': true,
  1348. 'tripleQuotedStrings': true,
  1349. 'regexLiterals': true
  1350. }), ['coffee']);
  1351. registerLangHandler(
  1352. createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
  1353. /** @param {JobT} job */
  1354. function applyDecorator(job) {
  1355. var opt_langExtension = job.langExtension;
  1356. try {
  1357. // Extract tags, and convert the source code to plain text.
  1358. var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre);
  1359. /** Plain text. @type {string} */
  1360. var source = sourceAndSpans.sourceCode;
  1361. job.sourceCode = source;
  1362. job.spans = sourceAndSpans.spans;
  1363. job.basePos = 0;
  1364. // Apply the appropriate language handler
  1365. langHandlerForExtension(opt_langExtension, source)(job);
  1366. // Integrate the decorations and tags back into the source code,
  1367. // modifying the sourceNode in place.
  1368. recombineTagsAndDecorations(job);
  1369. } catch (e) {
  1370. if (win['console']) {
  1371. console['log'](e && e['stack'] || e);
  1372. }
  1373. }
  1374. }
  1375. /**
  1376. * Pretty print a chunk of code.
  1377. * @param sourceCodeHtml {string} The HTML to pretty print.
  1378. * @param opt_langExtension {string} The language name to use.
  1379. * Typically, a filename extension like 'cpp' or 'java'.
  1380. * @param opt_numberLines {number|boolean} True to number lines,
  1381. * or the 1-indexed number of the first line in sourceCodeHtml.
  1382. */
  1383. function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
  1384. /** @type{number|boolean} */
  1385. var nl = opt_numberLines || false;
  1386. /** @type{string|null} */
  1387. var langExtension = opt_langExtension || null;
  1388. /** @type{!Element} */
  1389. var container = document.createElement('div');
  1390. // This could cause images to load and onload listeners to fire.
  1391. // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
  1392. // We assume that the inner HTML is from a trusted source.
  1393. // The pre-tag is required for IE8 which strips newlines from innerHTML
  1394. // when it is injected into a <pre> tag.
  1395. // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when-setting-innerhtml-in-ie
  1396. // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre-tag-ie-javascript
  1397. container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>';
  1398. container = /** @type{!Element} */(container.firstChild);
  1399. if (nl) {
  1400. numberLines(container, nl, true);
  1401. }
  1402. /** @type{JobT} */
  1403. var job = {
  1404. langExtension: langExtension,
  1405. numberLines: nl,
  1406. sourceNode: container,
  1407. pre: 1,
  1408. sourceCode: null,
  1409. basePos: null,
  1410. spans: null,
  1411. decorations: null
  1412. };
  1413. applyDecorator(job);
  1414. return container.innerHTML;
  1415. }
  1416. /**
  1417. * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
  1418. * {@code class=prettyprint} and prettify them.
  1419. *
  1420. * @param {Function} opt_whenDone called when prettifying is done.
  1421. * @param {HTMLElement|HTMLDocument} opt_root an element or document
  1422. * containing all the elements to pretty print.
  1423. * Defaults to {@code document.body}.
  1424. */
  1425. function $prettyPrint(opt_whenDone, opt_root) {
  1426. var root = opt_root || document.body;
  1427. var doc = root.ownerDocument || document;
  1428. function byTagName(tn) { return root.getElementsByTagName(tn); }
  1429. // fetch a list of nodes to rewrite
  1430. var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
  1431. var elements = [];
  1432. for (var i = 0; i < codeSegments.length; ++i) {
  1433. for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
  1434. elements.push(codeSegments[i][j]);
  1435. }
  1436. }
  1437. codeSegments = null;
  1438. var clock = Date;
  1439. if (!clock['now']) {
  1440. clock = { 'now': function () { return +(new Date); } };
  1441. }
  1442. // The loop is broken into a series of continuations to make sure that we
  1443. // don't make the browser unresponsive when rewriting a large page.
  1444. var k = 0;
  1445. var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
  1446. var prettyPrintRe = /\bprettyprint\b/;
  1447. var prettyPrintedRe = /\bprettyprinted\b/;
  1448. var preformattedTagNameRe = /pre|xmp/i;
  1449. var codeRe = /^code$/i;
  1450. var preCodeXmpRe = /^(?:pre|code|xmp)$/i;
  1451. var EMPTY = {};
  1452. function doWork() {
  1453. var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ?
  1454. clock['now']() + 250 /* ms */ :
  1455. Infinity);
  1456. for (; k < elements.length && clock['now']() < endTime; k++) {
  1457. var cs = elements[k];
  1458. // Look for a preceding comment like
  1459. // <?prettify lang="..." linenums="..."?>
  1460. var attrs = EMPTY;
  1461. {
  1462. for (var preceder = cs; (preceder = preceder.previousSibling);) {
  1463. var nt = preceder.nodeType;
  1464. // <?foo?> is parsed by HTML 5 to a comment node (8)
  1465. // like <!--?foo?-->, but in XML is a processing instruction
  1466. var value = (nt === 7 || nt === 8) && preceder.nodeValue;
  1467. if (value
  1468. ? !/^\??prettify\b/.test(value)
  1469. : (nt !== 3 || /\S/.test(preceder.nodeValue))) {
  1470. // Skip over white-space text nodes but not others.
  1471. break;
  1472. }
  1473. if (value) {
  1474. attrs = {};
  1475. value.replace(
  1476. /\b(\w+)=([\w:.%+-]+)/g,
  1477. function (_, name, value) { attrs[name] = value; });
  1478. break;
  1479. }
  1480. }
  1481. }
  1482. var className = cs.className;
  1483. if ((attrs !== EMPTY || prettyPrintRe.test(className))
  1484. // Don't redo this if we've already done it.
  1485. // This allows recalling pretty print to just prettyprint elements
  1486. // that have been added to the page since last call.
  1487. && !prettyPrintedRe.test(className)) {
  1488. // make sure this is not nested in an already prettified element
  1489. var nested = false;
  1490. for (var p = cs.parentNode; p; p = p.parentNode) {
  1491. var tn = p.tagName;
  1492. if (preCodeXmpRe.test(tn)
  1493. && p.className && prettyPrintRe.test(p.className)) {
  1494. nested = true;
  1495. break;
  1496. }
  1497. }
  1498. if (!nested) {
  1499. // Mark done. If we fail to prettyprint for whatever reason,
  1500. // we shouldn't try again.
  1501. cs.className += ' prettyprinted';
  1502. // If the classes includes a language extensions, use it.
  1503. // Language extensions can be specified like
  1504. // <pre class="prettyprint lang-cpp">
  1505. // the language extension "cpp" is used to find a language handler
  1506. // as passed to PR.registerLangHandler.
  1507. // HTML5 recommends that a language be specified using "language-"
  1508. // as the prefix instead. Google Code Prettify supports both.
  1509. // http://dev.w3.org/html5/spec-author-view/the-code-element.html
  1510. var langExtension = attrs['lang'];
  1511. if (!langExtension) {
  1512. langExtension = className.match(langExtensionRe);
  1513. // Support <pre class="prettyprint"><code class="language-c">
  1514. var wrapper;
  1515. if (!langExtension && (wrapper = childContentWrapper(cs))
  1516. && codeRe.test(wrapper.tagName)) {
  1517. langExtension = wrapper.className.match(langExtensionRe);
  1518. }
  1519. if (langExtension) { langExtension = langExtension[1]; }
  1520. }
  1521. var preformatted;
  1522. if (preformattedTagNameRe.test(cs.tagName)) {
  1523. preformatted = 1;
  1524. } else {
  1525. var currentStyle = cs['currentStyle'];
  1526. var defaultView = doc.defaultView;
  1527. var whitespace = (
  1528. currentStyle
  1529. ? currentStyle['whiteSpace']
  1530. : (defaultView
  1531. && defaultView.getComputedStyle)
  1532. ? defaultView.getComputedStyle(cs, null)
  1533. .getPropertyValue('white-space')
  1534. : 0);
  1535. preformatted = whitespace
  1536. && 'pre' === whitespace.substring(0, 3);
  1537. }
  1538. // Look for a class like linenums or linenums:<n> where <n> is the
  1539. // 1-indexed number of the first line.
  1540. var lineNums = attrs['linenums'];
  1541. if (!(lineNums = lineNums === 'true' || +lineNums)) {
  1542. lineNums = className.match(/\blinenums\b(?::(\d+))?/);
  1543. lineNums =
  1544. lineNums
  1545. ? lineNums[1] && lineNums[1].length
  1546. ? +lineNums[1] : true
  1547. : false;
  1548. }
  1549. if (lineNums) { numberLines(cs, lineNums, preformatted); }
  1550. // do the pretty printing
  1551. var prettyPrintingJob = {
  1552. langExtension: langExtension,
  1553. sourceNode: cs,
  1554. numberLines: lineNums,
  1555. pre: preformatted,
  1556. sourceCode: null,
  1557. basePos: null,
  1558. spans: null,
  1559. decorations: null
  1560. };
  1561. applyDecorator(prettyPrintingJob);
  1562. }
  1563. }
  1564. }
  1565. if (k < elements.length) {
  1566. // finish up in a continuation
  1567. win.setTimeout(doWork, 250);
  1568. } else if ('function' === typeof opt_whenDone) {
  1569. opt_whenDone();
  1570. }
  1571. }
  1572. doWork();
  1573. }
  1574. /**
  1575. * Contains functions for creating and registering new language handlers.
  1576. * @type {Object}
  1577. */
  1578. var PR = win['PR'] = {
  1579. 'createSimpleLexer': createSimpleLexer,
  1580. 'registerLangHandler': registerLangHandler,
  1581. 'sourceDecorator': sourceDecorator,
  1582. 'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
  1583. 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
  1584. 'PR_COMMENT': PR_COMMENT,
  1585. 'PR_DECLARATION': PR_DECLARATION,
  1586. 'PR_KEYWORD': PR_KEYWORD,
  1587. 'PR_LITERAL': PR_LITERAL,
  1588. 'PR_NOCODE': PR_NOCODE,
  1589. 'PR_PLAIN': PR_PLAIN,
  1590. 'PR_PUNCTUATION': PR_PUNCTUATION,
  1591. 'PR_SOURCE': PR_SOURCE,
  1592. 'PR_STRING': PR_STRING,
  1593. 'PR_TAG': PR_TAG,
  1594. 'PR_TYPE': PR_TYPE,
  1595. 'prettyPrintOne':
  1596. IN_GLOBAL_SCOPE
  1597. ? (win['prettyPrintOne'] = $prettyPrintOne)
  1598. : (prettyPrintOne = $prettyPrintOne),
  1599. 'prettyPrint': prettyPrint =
  1600. IN_GLOBAL_SCOPE
  1601. ? (win['prettyPrint'] = $prettyPrint)
  1602. : (prettyPrint = $prettyPrint)
  1603. };
  1604. // Make PR available via the Asynchronous Module Definition (AMD) API.
  1605. // Per https://github.com/amdjs/amdjs-api/wiki/AMD:
  1606. // The Asynchronous Module Definition (AMD) API specifies a
  1607. // mechanism for defining modules such that the module and its
  1608. // dependencies can be asynchronously loaded.
  1609. // ...
  1610. // To allow a clear indicator that a global define function (as
  1611. // needed for script src browser loading) conforms to the AMD API,
  1612. // any global define function SHOULD have a property called "amd"
  1613. // whose value is an object. This helps avoid conflict with any
  1614. // other existing JavaScript code that could have defined a define()
  1615. // function that does not conform to the AMD API.
  1616. var define = win['define'];
  1617. if (typeof define === "function" && define['amd']) {
  1618. define("google-code-prettify", [], function () {
  1619. return PR;
  1620. });
  1621. }
  1622. })();