Universal code syntax hightlighter

Un syntax highlighter minimal (encore plus léger et themable que microlight).

const hightlight = (code) => code
  // operators
.replaceAll(/\b(var|function|typeof|new|return|if|for|in|while|break|do|continue|switch|case|try|catch)([^a-z0-9\$_])/g,
  '<span class="c-keyword">$1</span>$2')
  // types
.replaceAll(/\b(RegExp|Boolean|Number|String|Array|Object|Function|this|true|false|NaN|undefined|null|Infinity)([^a-z0-9\$_])/g,
  '<span class="c-type">$1</span>$2')
  // comments
  .replaceAll(/(\/\*[^]*?\*\/|(\/\/)[^\n\r]+)/gim,'<span class="c-comment">$1</span>')
  // strings
  .replaceAll(/('.*?')/g,'<span class="c-string">$1</span>')
  // function & variables names (with link)
  .replaceAll(/([a-z\_\$][a-z0-9_]*)(\s?([\(\)\[\];]|[=+\-\*,<]\s)|\s>)/gi,'<a id="var-$1" href="#var-$1" class="c-variable">$1</a>$2')
  // braces
  .replaceAll(/(\{|\}|\]|\[|\|)/gi,'<span class="c-punctuation">$1</span>')
  // numbers
  .replaceAll(/(0x[0-9a-f]*|\b(\d*\.)?([\d]+(e-?[0-9]*)?)\b)/gi,'<span class="c-atom">$1</span>')//|(0x[0-9abcdefx]*)
  // tabulations (2 spaces)
  //.replace(/\t/g,'  ')

document.querySelectorAll('code')
    .forEach((code) => {
        code.innerHTML=hightlight(code.innerText)
    });
code .c-type {font-weight:700}
code .c-variable, .c-type {color: #228}
code .c-keyword {color: #708}
code .c-string {color:#a22}
code .c-punctuation {color:#666}
code .c-atom {color:#281}
code .c-comment, .c-comment * {color: #A70!important}
code *:target{background-color:#ff6}

Mais en quelques minutes d'utilisation, on voit qu'il souffre quelques problèmes. On peut donc l'améliorer rapidement en :

const hightlight = (code) => code
    .replaceAll(/(<)/g,'<span>$1</span>') // Mandatory, else innerHTML comments them with <!-- -->  
    // Strings
    .replaceAll(/('.*?'|".*?")/g,'<span class="c-string">$1</span>')
    // Operators
    .replaceAll(/\b(var|const|let|function|typeof|new|return|if|for|foreach|in|while|break|do|continue|switch|case|try|catch)([^a-z0-9\$_])/g,
        '<span class="c-operator">$1</span>$2')
    // Types
    .replaceAll(/\b(RegExp|Boolean|Number|String|Array|Object|Function|this|true|false|NaN|undefined|null|Infinity)([^a-z0-9\$_])/g,
        '<span class="c-type">$1</span>$2')
    // Comments
    .replaceAll(/(\/\*[^]*?\*\/|(?<!\:)(\/\/)[^\n\r]+|<span><<\/span>\!\-\-[^]*?\-\->)/gim,'<span class="c-comment">$1</span>')
    // Variables & Function names
    .replaceAll(/([a-z\_\$][a-z0-9_]*)(\s?([\(\)\[\];]|[=+\-\*,<]\s)|\s>)/gi,'<a id="var-$1" href="#var-$1" class="c-variable">$1</a>$2')
    // Braces
    .replaceAll(/(\{|\}|\]|\[|\|)/gi,'<span class="c-punctuation">$1</span>')
    // Numbers
    .replaceAll(/(0x[0-9a-f]*|\b(\d*\.)?([\d]+(e-?[0-9]*)?)\b)/gi,'<span class="c-atom">$1</span>')//|(0x[0-9abcdefx]*)
    // Tabs (2 spaces)
    //.replace(/\t/g,'  ')

document.querySelectorAll('pre > code')
                .forEach((code) => {
                    code.innerHTML=hightlight(code.textContent)
                });
code .c-type {font-weight:700}
code .c-variable, .c-type {color: #228}
code .c-operator {color: #708}
code .c-string {color:#a22}
code .c-punctuation {color:#666}
code .c-atom {color:#281}
code .c-comment, .c-comment * {color: #A70!important}
code *:target{background-color:#ff6}
/* Tomorrow Night theme */
@media screen and (prefers-color-scheme: dark) {
    code .c-type {color:#DB9455;font-style:700}
    code .c-operator {color: #B194B4}
    code .c-variable {color: #83A1C1}
    code .c-string {color:#D7C467}
    code .c-atom {color: #B1BE59}
    code .c-punctuation {color:inherit}
    code .c-comment, .c-comment * {color: #999!important;opacity:.5}
}

Modifications :

  • thème sombre automatique
  • support des langages à markup et PHP
  • ajout de const et let pour Javascript, foreach pour PHP
  • les commentaires ne se font plus dans les strings
  • les commentaires ne se font plus sur les URI (//)
  • appel via textContent et non le plus lent et moins fiable innerText (voir http://www.kellegous.com/j/2013/02/27/innertext-vs-textcontent/ mais pour résumer : innerText n'était pas standard, il ne s'applique qu'à des HTMLElement et non des Node, il ne retourne que le contenu visible, change les espaces et demande des infos de layout qui font qu'il est de 12 à 300 fois moins rapide)