Rust の Servo html5ever ライブラリで HTML をパース

Rust の html5ever ライブラリで HTML をパースする。Servo で使われているものと思われる。メモ。

https://github.com/servo/html5ever

High-performance browser-grade HTML5 parser

Cargo.toml

[package]
name = "htmlpaser-test"
version = "0.1.0"
edition = "2024"

[dependencies]
html5ever = "0.27.0"
markup5ever_rcdom = "0.3.0"

src/main.rs

use html5ever::driver::parse_document;
use html5ever::tendril::TendrilSink;
use markup5ever_rcdom::{RcDom, Handle};
use std::default::Default;

// HTML ドキュメントをパースする関数
fn parse_html(html: &str) -> RcDom {
    parse_document(RcDom::default(), Default::default()).one(html)
}

// DOM を再帰的に表示する関数
fn print_dom(handle: &Handle, depth: usize) {
    let indent = "  ".repeat(depth);
    let node = handle;

    match node.data {
        markup5ever_rcdom::NodeData::Document => {
            println!("{}Document", indent);
        }
        markup5ever_rcdom::NodeData::Element { ref name, ref attrs, .. } => {
            println!("{}Element: {}", indent, name.local);
            for attr in attrs.borrow().iter() {
                println!("{}  Attribute: {}=\"{}\"", indent, attr.name.local, attr.value);
            }
        }
        markup5ever_rcdom::NodeData::Text { ref contents } => {
            println!("{}Text: {}", indent, contents.borrow());
        }
        _ => {}
    }

    for child in node.children.borrow().iter() {
        print_dom(child, depth + 1);
    }
}

fn main() {
    let html = "<!DOCTYPE html><html><head><title>Test</title></head><body><h1 th:loop='test'>Hello, world!</h1></body></html>";
    let dom = parse_html(html);

    // DOM を再帰的に表示
    print_dom(&dom.document, 0);
}

結果:

$ cargo run
   Compiling htmlpaser-test v0.1.0 (/Users/hk2a/devel/rust/htmlpaser-test)
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.98s
     Running `target/debug/htmlpaser-test`
Document
  Element: html
    Element: head
      Element: title
        Text: Test
    Element: body
      Element: h1
        Attribute: th:loop="test"
        Text: Hello, world!

Jupyter Notebook の Rust カーネルでグラフや表形式を出力

Jupyter Notebook の Rust カーネルでグラフや表形式を出力するのに若干手間取ったのでメモです。VS Code 上で実行しています。Rust のセルで rust-analyzer が使えないのが若干不便。

導入

Jupyter の Rust カーネルを導入。

$ cargo install evcxr_jupyter
$ evcxr_jupyter --install
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/kernel.json
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/logo-32x32.png
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/logo-64x64.png
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/logo-LICENSE.md
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/kernel.js
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/lint.js
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/lint.css
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/lint-LICENSE
Writing /home/hiromasa/.local/share/jupyter/kernels/rust/version.txt
Installation complete

依存関係の導入

Jupyter Notebook の Rust セルで以下のマジックコマンドを実行して描画系の依存ライブラリを導入。

:dep plotters = { version = "0.3.1", default_features = false, features = ["evcxr", "line_series", "point_series"] }
:dep statrs = "0.15.0"
:dep prettytable = { git = "https://github.com/phsym/prettytable-rs", package = "prettytable-rs", features = ["evcxr"] }

グラフを描画する例

plotters を使ってグラフを描画する例。evcxr_figure を使って Jupyter に返却すると画像がでる。

use statrs::distribution::{Binomial, Discrete};

let n = 100u64;
let p = 0.01;
let bin = Binomial::new(p, n).unwrap();
let data: Vec<(f32, f32)> = (0..=n).map(|x| (x as f32, bin.pmf(x) as f32)).collect();
let max_y = data.iter().map(|(_, y)| *y).fold(0f32, f32::max) + 0.01f32;
let figure = evcxr_figure((800, 400), |root| {
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Binomial Distribution (n=100, p=0.01)", ("Arial", 20).into_font())
        .margin(5)
        .x_label_area_size(30)
        .y_label_area_size(40)
        .build_cartesian_2d(0f32..10f32, 0f32..max_y)?;
    chart.configure_mesh()
        .x_desc("当選回数")
        .y_desc("確率")
        .x_label_formatter(&|v| format!("{:.0}", v))
        .draw()?;
    // 折れ線グラフ + 点の形式へ変更
    chart.draw_series(LineSeries::new(
        data.iter().map(|(x, y)| (*x, *y)),
        &RED,
    ))?;
    chart.draw_series(data.iter().map(|(x, y)| {
        Circle::new((*x, *y), 4, RED.filled())
    }))?;
    Ok(())
});
figure

表敬式を出力する例

prettytable の Jupyter サポートを使って出力。

use prettytable::{Table, Row, Cell};
use prettytable::{format, Attr, color};
use prettytable::evcxr::EvcxrDisplay;

let mut table = Table::new();
table.add_row(Row::new(vec![
    Cell::new("Name").with_style(Attr::Bold),
    Cell::new("Age").with_style(Attr::ForegroundColor(color::GREEN)),
]));
table.add_row(Row::new(vec![
    Cell::new("Alice"),
    Cell::new("30"),
]));
table.add_row(Row::new(vec![
    Cell::new("Bob"),
    Cell::new("25"),
]));
table

表題と関係ないですが Python の例メモ

依存関係導入。

%pip install matplotlib
%pip install pandas
%pip install scipy

グラフと表敬式出力サンプル:

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom
import pandas as pd

# パラメータ設定
n = 100  # 試行回数
p = 0.01  # 成功確率

# 二項分布の確率質量関数
x = np.arange(0, n + 1)
pmf = binom.pmf(x, n, p)

# グラフ描画
plt.bar(x[:21], pmf[:21], color='blue', alpha=0.7)
plt.title('Binomial Distribution (n=100, p=0.01)')
plt.xlabel('Number of Successes')
plt.ylabel('Probability')
plt.grid(True)
plt.show()

# テーブル形式で出力 (Probability が 0% でないもののみ)
df = pd.DataFrame({
    'Number of Successes': x,
    'Probability (%)': (pmf * 100).round(2)
})
df_nonzero = df[df['Probability (%)'] > 0]
print(df_nonzero)

Rust の Cranelift で逆ポーランド記法を JIT コンパイル

Cranelift のコンパイラ基盤を使ったハローワールドメモ。

Cranelift is a low-level retargetable code generator. It translates a target-independent intermediate representation into executable machine code.

Wasmtime の JIT 基盤として使われているプロダクトで、Rust から Crainelift IR を構築して各 CPU マシン語コードにコンパイルして実行することができる。オリジナルのコンパイラ言語を作る時にも活用できそうです。

逆ポーランド記法をパースして JIT で計算させる

Cargo.toml

[package]
name = "cranelift-example"
version = "0.1.0"
edition = "2021"

[dependencies]
cranelift = "0.117.0"
cranelift-codegen = "0.117.0"
cranelift-frontend = "0.117.0"
cranelift-jit = "0.117.0"
cranelift-module = "0.117.0"
libc = "0.2"

bin/rpn.rs

use cranelift::codegen::ir::{types, AbiParam, Function, InstBuilder, Value};
use cranelift::frontend::{FunctionBuilder, FunctionBuilderContext};
use cranelift_codegen::ir::UserFuncName;
use cranelift_jit::{JITBuilder, JITModule};
use cranelift_module::Module;
use std::collections::VecDeque;

fn main() {
    // RPN expression input
    let rpn_expression = "5 1 2 + 4 * + 3 -";

    // Create a JIT builder and module.
    let jit_builder = JITBuilder::new(cranelift_module::default_libcall_names()).expect("Failed to create JITBuilder");
    let mut module = JITModule::new(jit_builder);

    // Create the main function signature.
    let mut ctx = module.make_context();
    let mut sig = module.make_signature();
    sig.returns.push(AbiParam::new(types::I32));
    let main_func = module.declare_function("main", cranelift_module::Linkage::Export, &sig).unwrap();

    ctx.func = Function::with_name_signature(UserFuncName::user(0, 1), sig);
    let mut builder_ctx = FunctionBuilderContext::new();
    let mut builder = FunctionBuilder::new(&mut ctx.func, &mut builder_ctx);

    // Create the entry block.
    let entry_block = builder.create_block();
    builder.switch_to_block(entry_block);
    builder.seal_block(entry_block);

    // Stack to hold values
    let mut stack: Vec<Value> = Vec::new();

    // Parse and evaluate the RPN expression
    let tokens: VecDeque<&str> = rpn_expression.split_whitespace().collect();
    for token in tokens {
        match token {
            "+" => {
                let b = stack.pop().expect("Stack underflow");
                let a = stack.pop().expect("Stack underflow");
                let result = builder.ins().iadd(a, b);
                stack.push(result);
            }
            "-" => {
                let b = stack.pop().expect("Stack underflow");
                let a = stack.pop().expect("Stack underflow");
                let result = builder.ins().isub(a, b);
                stack.push(result);
            }
            "*" => {
                let b = stack.pop().expect("Stack underflow");
                let a = stack.pop().expect("Stack underflow");
                let result = builder.ins().imul(a, b);
                stack.push(result);
            }
            "/" => {
                let b = stack.pop().expect("Stack underflow");
                let a = stack.pop().expect("Stack underflow");
                let result = builder.ins().sdiv(a, b);
                stack.push(result);
            }
            _ => {
                let value: i32 = token.parse().expect("Invalid token");
                let value = builder.ins().iconst(types::I32, value as i64);
                stack.push(value);
            }
        }
    }

    // The final result should be the only value left on the stack
    let result = stack.pop().expect("No result on stack");
    builder.ins().return_(&[result]);

    // Finalize the function.
    builder.finalize();

    println!("rpn_expression: {}", rpn_expression);
    println!("Compiled function: ");
    println!("{}", ctx.func.display());

    // Compile and run the function.
    module.define_function(main_func, &mut ctx).unwrap();
    module.clear_context(&mut ctx);
    module.finalize_definitions().expect("Failed to finalize definitions");

    let code_ptr = module.get_finalized_function(main_func);
    let code_fn = unsafe { std::mem::transmute::<_, fn() -> i32>(code_ptr) };
    let result = code_fn();

    println!("Result: {}", result); // Expected output: Result: 14
}

生成される Cranelift IR と実行ログ:

$ cargo run --bin rpn
   Compiling cranelift-example v0.1.0 (/Users/hk2a/devel/rust/cranelift-example)
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 1.20s
     Running `target/debug/rpn`
rpn_expression: 5 1 2 + 4 * + 3 -
Compiled function:
function u0:1() -> i32 system_v {
block0:
    v0 = iconst.i32 5
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    v3 = iadd v1, v2  ; v1 = 1, v2 = 2
    v4 = iconst.i32 4
    v5 = imul v3, v4  ; v4 = 4
    v6 = iadd v0, v5  ; v0 = 5
    v7 = iconst.i32 3
    v8 = isub v6, v7  ; v7 = 3
    return v8
}

Result: 14

逆ポーランド記法をパースして JIT で計算させる(スタックスロット)

bin/rpn.rs

use cranelift::codegen::ir::{types, AbiParam, Function, InstBuilder, StackSlot, StackSlotData, StackSlotKind, Value};
use cranelift::frontend::{FunctionBuilder, FunctionBuilderContext};
use cranelift_codegen::ir::UserFuncName;
use cranelift_jit::{JITBuilder, JITModule};
use cranelift_module::Module;
use std::collections::VecDeque;

fn main() {
    // RPN expression input
    let rpn_expression = "5 1 2 + 4 * + 3 -";

    // Create a JIT builder and module.
    let jit_builder = JITBuilder::new(cranelift_module::default_libcall_names()).expect("Failed to create JITBuilder");
    let mut module = JITModule::new(jit_builder);

    // Create the main function signature.
    let mut ctx = module.make_context();
    let mut sig = module.make_signature();
    sig.returns.push(AbiParam::new(types::I32));
    let main_func = module.declare_function("main", cranelift_module::Linkage::Export, &sig).unwrap();

    ctx.func = Function::with_name_signature(UserFuncName::user(0, 1), sig);
    let mut builder_ctx = FunctionBuilderContext::new();
    let mut builder = FunctionBuilder::new(&mut ctx.func, &mut builder_ctx);

    // Create the entry block.
    let entry_block = builder.create_block();
    builder.switch_to_block(entry_block);
    builder.seal_block(entry_block);

    // Stack slot to hold values
    let stack_slot = builder.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 16, 0));
    let mut offset = 0;

    // Parse and evaluate the RPN expression
    let tokens: VecDeque<&str> = rpn_expression.split_whitespace().collect();
    for token in tokens {
        match token {
            "+" => {
                let b = pop(&mut builder, stack_slot, &mut offset);
                let a = pop(&mut builder, stack_slot, &mut offset);
                let result = builder.ins().iadd(a, b);
                push(&mut builder, stack_slot, &mut offset, result);
            }
            "-" => {
                let b = pop(&mut builder, stack_slot, &mut offset);
                let a = pop(&mut builder, stack_slot, &mut offset);
                let result = builder.ins().isub(a, b);
                push(&mut builder, stack_slot, &mut offset, result);
            }
            "*" => {
                let b = pop(&mut builder, stack_slot, &mut offset);
                let a = pop(&mut builder, stack_slot, &mut offset);
                let result = builder.ins().imul(a, b);
                push(&mut builder, stack_slot, &mut offset, result);
            }
            "/" => {
                let b = pop(&mut builder, stack_slot, &mut offset);
                let a = pop(&mut builder, stack_slot, &mut offset);
                let result = builder.ins().sdiv(a, b);
                push(&mut builder, stack_slot, &mut offset, result);
            }
            _ => {
                let value: i32 = token.parse().expect("Invalid token");
                let value = builder.ins().iconst(types::I32, value as i64);
                push(&mut builder, stack_slot, &mut offset, value);
            }
        }
    }

    // The final result should be the only value left on the stack
    let result = pop(&mut builder, stack_slot, &mut offset);
    builder.ins().return_(&[result]);

    // Finalize the function.
    builder.finalize();

    println!("rpn_expression: {}", rpn_expression);
    println!("Compiled function: ");
    println!("{}", ctx.func.display());

    // Compile and run the function.
    module.define_function(main_func, &mut ctx).unwrap();
    module.clear_context(&mut ctx);
    module.finalize_definitions().expect("Failed to finalize definitions");

    let code_ptr = module.get_finalized_function(main_func);
    let code_fn = unsafe { std::mem::transmute::<_, fn() -> i32>(code_ptr) };
    let result = code_fn();

    println!("Result: {}", result); // Expected output: Result: 14
}

// Helper functions for stack operations
fn push(builder: &mut FunctionBuilder, stack_slot: StackSlot, offset: &mut i32, value: Value) {
    builder.ins().stack_store(value, stack_slot, *offset);
    *offset += 4;
}

fn pop(builder: &mut FunctionBuilder, stack_slot: StackSlot, offset: &mut i32) -> Value {
    *offset -= 4;
    builder.ins().stack_load(types::I32, stack_slot, *offset)
}

生成される Cranelift IR と実行ログ:

$ cargo run --bin rpn
   Compiling cranelift-example v0.1.0 (/Users/hk2a/devel/rust/cranelift-example)
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.74s
     Running `target/debug/rpn`
rpn_expression: 5 1 2 + 4 * + 3 -
Compiled function:
function u0:1() -> i32 system_v {
    ss0 = explicit_slot 16

block0:
    v0 = iconst.i32 5
    stack_store v0, ss0  ; v0 = 5
    v1 = iconst.i32 1
    stack_store v1, ss0+4  ; v1 = 1
    v2 = iconst.i32 2
    stack_store v2, ss0+8  ; v2 = 2
    v3 = stack_load.i32 ss0+8
    v4 = stack_load.i32 ss0+4
    v5 = iadd v4, v3
    stack_store v5, ss0+4
    v6 = iconst.i32 4
    stack_store v6, ss0+8  ; v6 = 4
    v7 = stack_load.i32 ss0+8
    v8 = stack_load.i32 ss0+4
    v9 = imul v8, v7
    stack_store v9, ss0+4
    v10 = stack_load.i32 ss0+4
    v11 = stack_load.i32 ss0
    v12 = iadd v11, v10
    stack_store v12, ss0
    v13 = iconst.i32 3
    stack_store v13, ss0+4  ; v13 = 3
    v14 = stack_load.i32 ss0+4
    v15 = stack_load.i32 ss0
    v16 = isub v15, v14
    stack_store v16, ss0
    v17 = stack_load.i32 ss0
    return v17
}

Result: 14

システムコールする例

libc の printf をコールして Hello, World を出力。

bin/hello.rs

use cranelift::codegen::ir::{types, AbiParam, Function, InstBuilder};
use cranelift::frontend::{FunctionBuilder, FunctionBuilderContext};
use cranelift_codegen::ir::UserFuncName;
use cranelift_jit::{JITBuilder, JITModule};
use std::ffi::CString;
use cranelift_module::{Module, Linkage};
use libc;

fn main() {
    // Create a JIT builder and module.
    let mut jit_builder = JITBuilder::new(cranelift_module::default_libcall_names()).expect("Failed to create JITBuilder");
    jit_builder.symbol("printf", printf as *const u8);
    let mut module = JITModule::new(jit_builder);

    // Create a function signature for `printf`.
    let mut ctx = module.make_context();
    let mut sig = module.make_signature();
    let pointer_type = module.target_config().pointer_type();
    sig.params.push(AbiParam::new(pointer_type)); // フォーマット文字列の引数
    sig.params.push(AbiParam::new(pointer_type)); // 可変引数のためのダミー引数
    sig.returns.push(AbiParam::new(types::I32));
    let printf = module.declare_function("printf", Linkage::Import, &sig).unwrap();

    // Create the main function signature.
    let mut sig = module.make_signature();
    sig.returns.push(AbiParam::new(types::I32));
    let main_func = module.declare_function("main", Linkage::Export, &sig).unwrap();

    ctx.func = Function::with_name_signature(UserFuncName::user(0, 1), sig);
    let mut builder_ctx = FunctionBuilderContext::new();
    let mut builder = FunctionBuilder::new(&mut ctx.func, &mut builder_ctx);

    // Create the entry block.
    let entry_block = builder.create_block();
    builder.append_block_params_for_function_params(entry_block);
    builder.switch_to_block(entry_block);
    builder.seal_block(entry_block);

    // Create the string data.
    let hello_world = CString::new("Hello, World!\n").unwrap();
    let hello_world_ptr = hello_world.as_ptr() as i64;

    // Call the `printf` function.
    let printf_func = module.declare_func_in_func(printf, builder.func);
    let format_str = builder.ins().iconst(types::I64, hello_world_ptr);
    let zero = builder.ins().iconst(types::I64, 0);
    let call = builder.ins().call(printf_func, &[format_str, zero]);

    // Return the result of the call.
    let result = builder.inst_results(call)[0];
    builder.ins().return_(&[result]);

    builder.finalize();
    println!("{}", ctx.func.display());

    // Compile and run the function.
    module.define_function(main_func, &mut ctx).unwrap();
    module.clear_context(&mut ctx);
    module.finalize_definitions().expect("Failed to finalize definitions");

    let code_ptr = module.get_finalized_function(main_func);
    let code_fn = unsafe { std::mem::transmute::<_, fn() -> i32>(code_ptr) };
    code_fn();
}

// Dummy printf function to link with.
extern "C" fn printf(fmt: *const i8, _dummy: i64) -> i32 {
    unsafe {
        libc::printf(fmt)
    }
}

生成される Cranelift IR と実行ログ:

$ cargo run --bin hello
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.47s
     Running `target/debug/hello`
function u0:1() -> i32 system_v {
    sig0 = (i64, i64) -> i32 system_v
    fn0 = u0:0 sig0

block0:
    v0 = iconst.i64 0x6000_0258_0180
    v1 = iconst.i64 0
    v2 = call fn0(v0, v1)  ; v0 = 0x6000_0258_0180, v1 = 0
    return v2
}

Hello, World!