tmpvar.com wrench logo

POC / WASM peglib

parser.cpp

// compile with:
// emcc -O2 --std=c++20 parser.cpp -s EXPORT_ES6=1 -lembind -o build/parser.js
#include <emscripten/bind.h>
#include <format>
#include "peglib.h"

using namespace emscripten;

std::string Parse(std::string source) {
  // (2) Make a parser
  peg::parser parser(R"(
        # Grammar for Calculator...
        Additive    <- Multiplicative '+' Additive / Multiplicative
        Multiplicative   <- Primary '*' Multiplicative^cond / Primary
        Primary     <- '(' Additive ')' / Number
        Number      <- < [0-9]+ >
        %whitespace <- [ \t]*
        cond <- '' { error_message "missing multiplicative" }
    )");

  if (static_cast<bool>(parser) != true) {
    return std::string("failed to parse grammar");
  }

  // (3) Setup actions
  parser["Additive"] = [](const peg::SemanticValues &vs) {
    switch (vs.choice()) {
    case 0: // "Multiplicative '+' Additive"
      return any_cast<int>(vs[0]) + any_cast<int>(vs[1]);
    default: // "Multiplicative"
      return any_cast<int>(vs[0]);
    }
  };

  parser["Multiplicative"] = [](const peg::SemanticValues &vs) {
    switch (vs.choice()) {
    case 0: // "Primary '*' Multiplicative"
      return any_cast<int>(vs[0]) * any_cast<int>(vs[1]);
    default: // "Primary"
      return any_cast<int>(vs[0]);
    }
  };

  parser["Number"] = [](const peg::SemanticValues &vs) {
    return vs.token_to_number<int>();
  };

  // (4) Parse
  parser.enable_packrat_parsing(); // Enable packrat parsing.

  int val = 0;
  auto ret = parser.parse(source, val);
  if (ret == true) {
    return std::format("{}", val);
  }
  return std::format("failed to parse: '{}'", source);
}

EMSCRIPTEN_BINDINGS(peglib) {
  function("Parse", &Parse);
}

demo.js

import Module from './build/parser.js'

(async () => {
  const m = await Module();

  const outputEl = document.getElementById('output')
  const inputEl = document.getElementById('string-to-parse')
  inputEl.addEventListener('keyup', (e) => {
    outputEl.innerText = m.Parse(e.target.value)
  })

  outputEl.innerText = m.Parse(inputEl.getAttribute('value'))
})();



Notes

I originally wanted to do this using clang only, but ran into some issues related to the default NixOS clang-wrapper. In order to get something working, I tried emcc and later found embind which removes a bunch of the work.

Refs