Functional visitors for a complex tree-shaped data structure

Let's consider this data type:

type e = | A | B of t | C of string * t * t | D of string | E of int
and  t = e list
[try]

If we want to write an abstraction to apply a function to this data structure (the one of type t) without being bothered to treat the whole structure everytime, we can get some inspiration from the visitor design pattern commonly used in object-oriented programming. The “problem” is that we don't have the same inheritance mechanism in functional programming, meaning that the default behaviour has to be implemented with functions instead of using methods that can be overriden in subclasses.

First attempt

The following implementation met my needs:

let rec visit f = function
  | [] -> []
  | (A | E _ | D _) as e :: tl ->
    begin match f e with
      | Some(l) -> l @ visit f tl
      | None -> e :: visit f tl
    end
  | B(t) as e :: tl ->
    begin match f e with
      | Some(l) -> l @ visit f tl
      | None -> B(visit f t) :: visit f tl
    end
  | C(s, t1, t2) as e :: tl ->
    begin match f e with
      | Some(l) -> l @ visit f tl
      | None -> C(s, visit f t1, visit f t2) :: visit f tl
    end
(* val visit : (e -> t option) -> t -> t *)
[try]

With this abstraction, it becomes very easy to write a function that removes all As:

let remove_A t = visit (function A -> Some [] | _ -> None) t
[try]

or all Bs:

let remove_B t = visit (function B _ -> Some [] | _ -> None) t
[try]

and it's also very easy to convert all Es to Ds:

let convert_Es_to_Ds t =
  visit (function E(i) -> Some [D(string_of_int i)] | _ -> None) t
[try]

Doing this with some fold abstraction is not suitable because we would need to do something like

let rec remove_B t =
  List.rev
    (fold
       (fun r ->
          function
          | B _ -> r
          | (A | E _ | D _) as e -> e::r
          | C(s, t1, t2) -> C(s, remove_B t1, remove_B t2))
       []
       t)
[try]

in which case what bothers me most is the line that has C(s, remove_B t1, remove_B t2) because it means that we still have to make trivial recursive calls that are just annoying to write (trivial code should be avoided whenever possible because it increases the chances to introduce nasty bugs).

What about genericity?

Well, perhaps we might want to have a visitor that doesn't always return a t. Can we make it generic using polymorphic abstractions? The implementation of visit returns a value, so if we want it to be able to return something else, we have to parameterise the default behaviour.

So, let's add a parameter to the function visit and see what it looks like. Well, let's call it glue since it's a function that glues 2 things together (if you find a better name, please let me know).

let rec visit f glue = function
  | [] -> []
  | (A | E _ | D _) as e :: tl ->
    begin match f e with
      | Some(l) -> glue l (visit f glue tl)
      | None -> glue [] (e :: visit f glue tl)
    end
  | B(t) as e :: tl ->
    begin match f e with
      | Some(l) -> glue l (visit f glue tl)
      | None -> glue [] (B(visit f glue t) :: visit f glue tl)
    end
  | C(s, t1, t2) as e :: tl ->
    begin match f e with
      | Some(l) -> glue l (visit f glue tl)
      | None ->
        glue []
          (C(s, visit f glue t1, visit f glue t2) :: visit f glue tl)
    end
(* val visit : (e -> 'a list option) -> ('a list -> t -> t) -> t -> t *)
[try]

We're almost there. There's still that 'a list, which is less generic than 'a. Well, glue needs to have 2 parameters because it has to be able to treat both branches of the pattern-matching filter, so we could make the first parameter optional. We could use optional arguments, and we can see that f already returns an option, can we just take advantage of that? Well, if we replace for instance

  | (A | E _ | D _) as e :: tl ->
    begin match f e with
      | Some(l) -> glue l (visit f glue tl)
      | None -> glue [] (e :: visit f glue tl)
    end
[try]

by

  | (A | E _ | D _) as e :: tl ->
      glue (f e) (visit f glue tl)
[try]

it does work for this branch but it doesn't for the others because one purpose of the visitor is to deeply traverse the data structure automatically. So we're back to optional arguments...

And so we could have that:

let rec visit f ?(glue=(fun ?l r -> match l with None -> r | Some l -> l @ r)) = function
  | [] -> []
  | (A | E _ | D _) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f ~glue tl)
      | None -> glue (e :: visit f ~glue tl)
    end
  | B(t) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f ~glue tl)
      | None -> glue (B(visit f ~glue t) :: visit f ~glue tl)
    end
  | C(s, t1, t2) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f ~glue tl)
      | None ->
        glue 
          (C(s, visit f ~glue t1, visit f ~glue t2) :: visit f ~glue tl)
    end
(* val visit : (e -> t option) -> ?glue:(?l:t -> t -> t) -> t -> t *)
[try]

Then the problem is that it's not polymorphic any more, while we want it to be polymorphic! So let's drop the default value for the parameter glue.

let rec visit f (glue:(?l:'a-> t -> t)) = function
  | [] -> []
  | (A | E _ | D _) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f glue tl)
      | None -> glue (e :: (visit f glue tl)
    end
  | B(t) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f glue tl)
      | None -> glue (B(visit f glue t) :: visit f glue tl)
    end
  | C(s, t1, t2) as e :: tl ->
    begin match f e with
      | Some(l) -> glue ~l (visit f glue tl)
      | None ->
        glue 
          (C(s, visit f glue t1, visit f glue t2) :: visit f glue tl)
    end
(* val visit : (e -> 'a option) -> (?l:'a -> t -> t) -> t -> t *)
[try]

There we go, we have a generic visitor for structures of type t.

As we did before, we may now define remove_A, remove_B and convert_Es_to_Ds, using the following quite simple definition of glue.

let glue ?l t = match l with Some l -> l @ t | None -> t
[try]
let remove_A t = visit (function A -> Some [] | _ -> None) glue t
[try]
let remove_B t = visit (function B _ -> Some [] | _ -> None : e -> t option) glue t
[try]
let convert_Es_to_Ds t =
  visit (function E(i) -> Some [D(string_of_int i)] | _ -> None) glue t
[try]

We could actually make it more generic by allowing glue to return something else than a t, and we could have something resembling val visit : (e -> 'a option) -> (?l:'a -> t -> 'b) -> t -> 'b. Well, the problem is that we have to tell visit how to reconstruct a visited C. It would be easy to have a more generic version of visit of type val visit : (e -> 'a option) -> (?l:'a -> t -> ('b * t)) -> t -> ('b * t) but that's become far from simple to understand: it looks like some sort of visit and fold merged together. Hmmm... Let's not go that far, for now.


started on 2014-01-06 09:24:25+00:00, (re)generated on 2014-01-15 15:32:42+00:00

tags: • ocaml