diff --git a/CHANGES.md b/CHANGES.md index d91081c..d427672 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,7 @@ ## v0.9.1 (27/03/2017) -- Switch from `base` to `base.sexplib0`, so that linking with - `sexplib` doesn't link all of `base` +- Re-import the code split out from Sexplib into Base as + sexplib0. Remove the Base dependency ## v0.9.0 diff --git a/sexplib.opam b/sexplib.opam index bd3e881..fde5e55 100644 --- a/sexplib.opam +++ b/sexplib.opam @@ -10,7 +10,6 @@ build: [ ["jbuilder" "build" "--only-packages" "sexplib" "--root" "." "-j" jobs "@install"] ] depends: [ - "base" {>= "v0.9.1" & < "v0.10"} "jbuilder" {build & >= "1.0+beta2"} ] available: [ ocaml-version >= "4.03.0" ] diff --git a/src/jbuild b/src/jbuild index fa3a81b..df07cb4 100644 --- a/src/jbuild +++ b/src/jbuild @@ -1,6 +1,6 @@ (library ((name sexplib) - (libraries (bigarray base.sexplib0)) + (libraries (bigarray sexplib0)) (public_name sexplib) (preprocess (per_file ((action (system "${PA_CPP} ${<}")) (pre_sexp)))) )) diff --git a/src0/jbuild b/src0/jbuild new file mode 100644 index 0000000..b9293d9 --- /dev/null +++ b/src0/jbuild @@ -0,0 +1,5 @@ +(jbuild_version 1) + +(library + ((name sexplib0) + (public_name sexplib.0))) diff --git a/src0/jbuild~ b/src0/jbuild~ new file mode 100644 index 0000000..87ffe8f --- /dev/null +++ b/src0/jbuild~ @@ -0,0 +1,5 @@ +(jbuild_version 1) + +(library + ((name sexplib0) + (public_name base.sexplib0))) diff --git a/src0/sexp.ml b/src0/sexp.ml new file mode 100644 index 0000000..ad9d280 --- /dev/null +++ b/src0/sexp.ml @@ -0,0 +1,286 @@ +(* We do not [open! Import] because [Sexp] is used in [Sexp_conv], which is used in + [Import]. *) + +[@@@ocaml.warning "-3"] + +open StdLabels +open Format + +(* Type of S-expressions *) +type t = Atom of string | List of t list + +let sexp_of_t t = t +let t_of_sexp t = t + +let equal a b = compare a b = 0 + +exception Of_sexp_error of exn * t + +module Printing = struct + (* Default indentation level for human-readable conversions *) + + let default_indent = ref 1 + + (* Escaping of strings used as atoms in S-expressions *) + + let must_escape str = + let len = String.length str in + len = 0 || + let rec loop ix = + match str.[ix] with + | '"' | '(' | ')' | ';' | '\\' -> true + | '|' -> ix > 0 && let next = ix - 1 in Char.equal str.[next] '#' || loop next + | '#' -> ix > 0 && let next = ix - 1 in Char.equal str.[next] '|' || loop next + | '\000' .. '\032' | '\127' .. '\255' -> true + | _ -> ix > 0 && loop (ix - 1) + in + loop (len - 1) + + let escaped s = + let open String in + let n = ref 0 in + for i = 0 to length s - 1 do + n := !n + + (match unsafe_get s i with + | '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2 + | ' ' .. '~' -> 1 + | _ -> 4) + done; + if !n = length s then copy s else begin + let s' = create !n in + n := 0; + for i = 0 to length s - 1 do + begin match unsafe_get s i with + | ('\"' | '\\') as c -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n c + | '\n' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'n' + | '\t' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 't' + | '\r' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'r' + | '\b' -> + unsafe_set s' !n '\\'; incr n; unsafe_set s' !n 'b' + | (' ' .. '~') as c -> unsafe_set s' !n c + | c -> + let a = Char.code c in + unsafe_set s' !n '\\'; + incr n; + unsafe_set s' !n (Char.chr (48 + a / 100)); + incr n; + unsafe_set s' !n (Char.chr (48 + (a / 10) mod 10)); + incr n; + unsafe_set s' !n (Char.chr (48 + a mod 10)); + end; + incr n + done; + s' + end + + let esc_str str = + let estr = escaped str in + let elen = String.length estr in + let res = String.create (elen + 2) in + String.blit ~src:estr ~src_pos:0 ~dst:res ~dst_pos:1 ~len:elen; + res.[0] <- '"'; + res.[elen + 1] <- '"'; + res + + let index_of_newline str start = + try Some (String.index_from str start '\n') + with Not_found -> None + + let get_substring str index end_pos_opt = + let end_pos = + match end_pos_opt with + | None -> String.length str + | Some end_pos -> end_pos + in + String.sub str ~pos:index ~len:(end_pos - index) + + let is_one_line str = + match index_of_newline str 0 with + | None -> true + | Some index -> index + 1 = String.length str + + let pp_hum_maybe_esc_str ppf str = + if not (must_escape str) then + pp_print_string ppf str + else if is_one_line str then + pp_print_string ppf (esc_str str) + else begin + let rec loop index = + let next_newline = index_of_newline str index in + let next_line = get_substring str index next_newline in + pp_print_string ppf (escaped next_line); + match next_newline with + | None -> () + | Some newline_index -> + pp_print_string ppf "\\"; + pp_force_newline ppf (); + pp_print_string ppf "\\n"; + loop (newline_index + 1) + in + pp_open_box ppf 0; + (* the leading space is to line up the lines *) + pp_print_string ppf " \""; + loop 0; + pp_print_string ppf "\""; + pp_close_box ppf (); + end + + let mach_maybe_esc_str str = + if must_escape str then esc_str str else str + + (* Output of S-expressions to formatters *) + + let rec pp_hum_indent indent ppf = function + | Atom str -> pp_hum_maybe_esc_str ppf str + | List (h :: t) -> + pp_open_box ppf indent; + pp_print_string ppf "("; + pp_hum_indent indent ppf h; + pp_hum_rest indent ppf t + | List [] -> pp_print_string ppf "()" + + and pp_hum_rest indent ppf = function + | h :: t -> + pp_print_space ppf (); + pp_hum_indent indent ppf h; + pp_hum_rest indent ppf t + | [] -> + pp_print_string ppf ")"; + pp_close_box ppf () + + let rec pp_mach_internal may_need_space ppf = function + | Atom str -> + let str' = mach_maybe_esc_str str in + let new_may_need_space = str' == str in + if may_need_space && new_may_need_space then pp_print_string ppf " "; + pp_print_string ppf str'; + new_may_need_space + | List (h :: t) -> + pp_print_string ppf "("; + let may_need_space = pp_mach_internal false ppf h in + pp_mach_rest may_need_space ppf t; + false + | List [] -> pp_print_string ppf "()"; false + + and pp_mach_rest may_need_space ppf = function + | h :: t -> + let may_need_space = pp_mach_internal may_need_space ppf h in + pp_mach_rest may_need_space ppf t + | [] -> pp_print_string ppf ")" + + let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp + + let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp) + let pp = pp_mach + + (* Sexp size *) + + let rec size_loop (v, c as acc) = function + | Atom str -> v + 1, c + String.length str + | List lst -> List.fold_left lst ~init:acc ~f:size_loop + + let size sexp = size_loop (0, 0) sexp + + (* Buffer conversions *) + + let to_buffer_hum ~buf ?(indent = !default_indent) sexp = + let ppf = Format.formatter_of_buffer buf in + Format.fprintf ppf "%a@?" (pp_hum_indent indent) sexp + + let to_buffer_mach ~buf sexp = + let rec loop may_need_space = function + | Atom str -> + let str' = mach_maybe_esc_str str in + let new_may_need_space = str' == str in + if may_need_space && new_may_need_space then Buffer.add_char buf ' '; + Buffer.add_string buf str'; + new_may_need_space + | List (h :: t) -> + Buffer.add_char buf '('; + let may_need_space = loop false h in + loop_rest may_need_space t; + false + | List [] -> Buffer.add_string buf "()"; false + and loop_rest may_need_space = function + | h :: t -> + let may_need_space = loop may_need_space h in + loop_rest may_need_space t + | [] -> Buffer.add_char buf ')' in + ignore (loop false sexp) + + let to_buffer = to_buffer_mach + + let to_buffer_gen ~buf ~add_char ~add_string sexp = + let rec loop may_need_space = function + | Atom str -> + let str' = mach_maybe_esc_str str in + let new_may_need_space = str' == str in + if may_need_space && new_may_need_space then add_char buf ' '; + add_string buf str'; + new_may_need_space + | List (h :: t) -> + add_char buf '('; + let may_need_space = loop false h in + loop_rest may_need_space t; + false + | List [] -> add_string buf "()"; false + and loop_rest may_need_space = function + | h :: t -> + let may_need_space = loop may_need_space h in + loop_rest may_need_space t + | [] -> add_char buf ')' in + ignore (loop false sexp) + + (* The maximum size of a thing on the minor heap is 256 words. + Previously, this size of the returned buffer here was 4096 bytes, which + caused the Buffer to be allocated on the *major* heap every time. + + According to a simple benchmark by Ron, we can improve performance for + small s-expressions by a factor of ~4 if we only allocate 1024 bytes + (128 words + some small overhead) worth of buffer initially. And one + can argue that if it's free to allocate strings smaller than 256 words, + large s-expressions requiring larger expensive buffers won't notice + the extra two doublings from 1024 bytes to 2048 and 4096. And especially + performance-sensitive applications to always pass in a larger buffer to + use. *) + let buffer () = Buffer.create 1024 + + (* String conversions *) + + let to_string_hum ?indent = function + | Atom str when (match index_of_newline str 0 with None -> true | Some _ -> false) -> + mach_maybe_esc_str str + | sexp -> + let buf = buffer () in + to_buffer_hum ?indent sexp ~buf; + Buffer.contents buf + + let to_string_mach = function + | Atom str -> mach_maybe_esc_str str + | sexp -> + let buf = buffer () in + to_buffer_mach sexp ~buf; + Buffer.contents buf + + let to_string = to_string_mach +end +include Printing + +let of_float_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores +let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores + +module Private = Printing + +let message name fields = + let rec conv_fields = function + | [] -> [] + | (fname, fsexp) :: rest -> + match fname with + | "" -> fsexp :: conv_fields rest + | _ -> List [ Atom fname; fsexp ] :: conv_fields rest + in + List (Atom name :: conv_fields fields) diff --git a/src0/sexp.mli b/src0/sexp.mli new file mode 100644 index 0000000..969999d --- /dev/null +++ b/src0/sexp.mli @@ -0,0 +1,106 @@ +(** Type of S-expressions *) + +type t = Atom of string | List of t list + +(*_ We don't use [@@deriving_inline sexp][@@@end] as this would generated references to [Sexplib], + creating a circular dependency *) +val t_of_sexp : t -> t +val sexp_of_t : t -> t + +val equal : t -> t -> bool + +(** [Of_sexp_error (exn, sexp)] the exception raised when an S-expression could not be + successfully converted to an OCaml-value. *) +exception Of_sexp_error of exn * t + +(** {1 Helpers} *) + +(** Helper to build nice s-expressions for error messages. It imitates the behavior of + [[%message ...]] from the ppx_sexp_message rewriter. + + [message name key_values] produces a s-expression list starting with atom [name] and + followed by list of size 2 of the form [(key value)]. When the key is the empty + string, [value] is used directly instead as for [[%message]]. + + For instance the following code: + + {[ + Sexp.message "error" + [ "x", sexp_of_int 42 + ; "" , sexp_of_exn Exit + ] + ]} + + produces the s-expression: + + {[ + (error (x 42) Exit) + ]} *) +val message : string -> (string * t) list -> t + +(** {1 Defaults} *) + +(** [default_indent] reference to default indentation level for human-readable + conversions. + + Initialisation value: 2. *) +val default_indent : int ref + +(** {1 Pretty printing of S-expressions} *) + +(** [pp_hum ppf sexp] outputs S-expression [sexp] to formatter [ppf] in human readable + form. *) +val pp_hum : Format.formatter -> t -> unit + +(** [pp_hum_indent n ppf sexp] outputs S-expression [sexp] to formatter [ppf] in human + readable form and indentation level [n]. *) +val pp_hum_indent : int -> Format.formatter -> t -> unit + +(** [pp_mach ppf sexp] outputs S-expression [sexp] to formatter [ppf] in machine readable + (i.e. most compact) form. *) +val pp_mach : Format.formatter -> t -> unit + +(** Same as [pp_mach]. *) +val pp : Format.formatter -> t -> unit + +(** {1 Conversion to strings} *) + +(** [to_string_hum ?indent sexp] converts S-expression [sexp] to a + string in human readable form with indentation level [indent]. + + @param indent default = [!default_indent] *) +val to_string_hum : ?indent : int -> t -> string + +(** [to_string_mach sexp] converts S-expression [sexp] to a string in + machine readable (i.e. most compact) form. *) +val to_string_mach : t -> string + +(** Same as [to_string_mach]. *) +val to_string : t -> string + +(** {1 Styles} *) + +val of_float_style : [ `Underscores | `No_underscores ] ref +val of_int_style : [ `Underscores | `No_underscores ] ref + +module Private : sig + (*_ Exported for sexplib *) + + val size : t -> int * int + + val buffer : unit -> Buffer.t + + val to_buffer : buf:Buffer.t -> t -> unit + val to_buffer_hum : buf:Buffer.t -> ?indent:int -> t -> unit + val to_buffer_mach : buf:Buffer.t -> t -> unit + val to_buffer_gen + : buf : 'buffer + -> add_char : ('buffer -> char -> unit) + -> add_string : ('buffer -> string -> unit) + -> t + -> unit + + val mach_maybe_esc_str : string -> string + val must_escape : string -> bool + val esc_str : string -> string +end diff --git a/src0/sexp_conv.ml b/src0/sexp_conv.ml new file mode 100644 index 0000000..2629120 --- /dev/null +++ b/src0/sexp_conv.ml @@ -0,0 +1,438 @@ +(* Utility Module for S-expression Conversions *) + +open StdLabels +open MoreLabels +open Printf +open Sexp + +[@@@ocaml.warning "-3"] + +type sexp_bool = bool +type 'a sexp_option = 'a option +type 'a sexp_list = 'a list +type 'a sexp_array = 'a array +type 'a sexp_opaque = 'a + +(* Conversion of OCaml-values to S-expressions *) + +external format_float : string -> float -> string = "caml_format_float" + +(* '%.17g' is guaranteed to be round-trippable. + + '%.15g' will be round-trippable and not have noise at the last digit or two for a float + which was converted from a decimal (string) with <= 15 significant digits. So it's + worth trying first to avoid things like "3.1400000000000001". + + See comment above [to_string_round_trippable] in {!Core_kernel.Float} for + detailed explanation and examples. *) +let default_string_of_float = + ref (fun x -> + let y = format_float "%.15G" x in + if float_of_string y = x then + y + else + format_float "%.17G" x) +;; + +let read_old_option_format = ref true +let write_old_option_format = ref true + +let list_map f l = List.rev (List.rev_map l ~f) + +let sexp_of_unit () = List [] +let sexp_of_bool b = Atom (string_of_bool b) +let sexp_of_string str = Atom str +let sexp_of_char c = Atom (String.make 1 c) +let sexp_of_int n = Atom (string_of_int n) +let sexp_of_float n = Atom (!default_string_of_float n) +let sexp_of_int32 n = Atom (Int32.to_string n) +let sexp_of_int64 n = Atom (Int64.to_string n) +let sexp_of_nativeint n = Atom (Nativeint.to_string n) +let sexp_of_ref sexp_of__a rf = sexp_of__a !rf +let sexp_of_lazy_t sexp_of__a lv = sexp_of__a (Lazy.force lv) + +let sexp_of_option sexp_of__a = function + | Some x when !write_old_option_format -> List [sexp_of__a x] + | Some x -> List [Atom "some"; sexp_of__a x] + | None when !write_old_option_format -> List [] + | None -> Atom "none" + +let sexp_of_pair sexp_of__a sexp_of__b (a, b) = + List [sexp_of__a a; sexp_of__b b] + +let sexp_of_triple sexp_of__a sexp_of__b sexp_of__c (a, b, c) = + List [sexp_of__a a; sexp_of__b b; sexp_of__c c] + +(* List.rev (List.rev_map ...) is tail recursive, the OCaml standard + library List.map is NOT. *) +let sexp_of_list sexp_of__a lst = List (List.rev (List.rev_map lst ~f:sexp_of__a)) + +let sexp_of_array sexp_of__a ar = + let lst_ref = ref [] in + for i = Array.length ar - 1 downto 0 do + lst_ref := sexp_of__a ar.(i) :: !lst_ref + done; + List !lst_ref + +let sexp_of_hashtbl sexp_of_key sexp_of_val htbl = + let coll ~key:k ~data:v acc = List [sexp_of_key k; sexp_of_val v] :: acc in + List (Hashtbl.fold htbl ~init:[] ~f:coll) + +let sexp_of_opaque _ = Atom "" +let sexp_of_fun _ = Atom "" + + +(* Exception converter registration and lookup *) + +module Exn_converter = struct + (* These exception registration functions assume that context-switches + cannot happen unless there is an allocation. It is reasonable to expect + that this will remain true for the foreseeable future. That way we + avoid using mutexes and thus a dependency on the threads library. *) + + (* Fast and automatic exception registration *) + + module Int = struct + type t = int + + let compare t1 t2 = Pervasives.compare (t1 : int) t2 + end + + module Exn_ids = Map.Make (Int) + + let exn_id_map + : (extension_constructor, exn -> Sexp.t) Ephemeron.K1.t Exn_ids.t ref = + ref Exn_ids.empty + + (* [Obj.extension_id] works on both the exception itself, and the extension slot of the + exception. *) + let rec clean_up_handler (slot : extension_constructor) = + let id = Obj.extension_id slot in + let old_exn_id_map = !exn_id_map in + let new_exn_id_map = Exn_ids.remove id old_exn_id_map in + (* This trick avoids mutexes and should be fairly efficient *) + if !exn_id_map != old_exn_id_map then + clean_up_handler slot + else + exn_id_map := new_exn_id_map + + (* Ephemerons are used so that [sexp_of_exn] closure don't keep the + extension_constructor live. *) + let add ?(finalise = true) extension_constructor sexp_of_exn = + let id = Obj.extension_id extension_constructor in + let rec loop () = + let old_exn_id_map = !exn_id_map in + let ephe = Ephemeron.K1.create () in + Ephemeron.K1.set_data ephe sexp_of_exn; + Ephemeron.K1.set_key ephe extension_constructor; + let new_exn_id_map = Exn_ids.add old_exn_id_map ~key:id ~data:ephe in + (* This trick avoids mutexes and should be fairly efficient *) + if !exn_id_map != old_exn_id_map then + loop () + else begin + exn_id_map := new_exn_id_map; + if finalise then + try + Gc.finalise clean_up_handler extension_constructor + with Invalid_argument _ -> + (* Pre-allocated extension constructors cannot be finalised *) + () + end + in + loop () + + let add_auto ?finalise exn sexp_of_exn = + add ?finalise (Obj.extension_constructor exn) sexp_of_exn + + let find_auto exn = + let id = Obj.extension_id (Obj.extension_constructor exn) in + match Exn_ids.find id !exn_id_map with + | exception Not_found -> None + | ephe -> + match Ephemeron.K1.get_data ephe with + | None -> None + | Some sexp_of_exn -> Some (sexp_of_exn exn) + + + module For_unit_tests_only = struct + let size () = Exn_ids.fold !exn_id_map ~init:0 ~f:(fun ~key:_ ~data:ephe acc -> + match Ephemeron.K1.get_data ephe with + | None -> acc + | Some _ -> acc + 1 + ) + end + +end + +let sexp_of_exn_opt exn = Exn_converter.find_auto exn + + +let sexp_of_exn exn = + match sexp_of_exn_opt exn with + | None -> List [Atom (Printexc.to_string exn)] + | Some sexp -> sexp + +let exn_to_string e = Sexp.to_string_hum (sexp_of_exn e) + +(* {[exception Blah [@@deriving_inline sexp][@@@end]]} generates a call to the function + [Exn_converter.add] defined in this file. So we are guaranted that as soon as we + mark an exception as sexpable, this module will be linked in and this printer will be + registered, which is what we want. *) +let () = + Printexc.register_printer (fun exn -> + match sexp_of_exn_opt exn with + | None -> None + | Some sexp -> + Some (Sexp.to_string_hum ~indent:2 sexp)) + +(* Conversion of S-expressions to OCaml-values *) + +exception Of_sexp_error = Sexp.Of_sexp_error + +let record_check_extra_fields = ref true + +let of_sexp_error_exn exc sexp = raise (Of_sexp_error (exc, sexp)) + +let of_sexp_error what sexp = raise (Of_sexp_error (Failure what, sexp)) + +let unit_of_sexp sexp = match sexp with + | List [] -> () + | Atom _ | List _ -> of_sexp_error "unit_of_sexp: empty list needed" sexp + +let bool_of_sexp sexp = match sexp with + | Atom ("true" | "True") -> true + | Atom ("false" | "False") -> false + | Atom _ -> of_sexp_error "bool_of_sexp: unknown string" sexp + | List _ -> of_sexp_error "bool_of_sexp: atom needed" sexp + +let string_of_sexp sexp = match sexp with + | Atom str -> str + | List _ -> of_sexp_error "string_of_sexp: atom needed" sexp + +let char_of_sexp sexp = match sexp with + | Atom str -> + if String.length str <> 1 then + of_sexp_error + "char_of_sexp: atom string must contain one character only" sexp; + str.[0] + | List _ -> of_sexp_error "char_of_sexp: atom needed" sexp + +let int_of_sexp sexp = match sexp with + | Atom str -> + (try int_of_string str + with exc -> of_sexp_error ("int_of_sexp: " ^ exn_to_string exc) sexp) + | List _ -> of_sexp_error "int_of_sexp: atom needed" sexp + +let float_of_sexp sexp = match sexp with + | Atom str -> + (try float_of_string str + with exc -> + of_sexp_error ("float_of_sexp: " ^ exn_to_string exc) sexp) + | List _ -> of_sexp_error "float_of_sexp: atom needed" sexp + +let int32_of_sexp sexp = match sexp with + | Atom str -> + (try Int32.of_string str + with exc -> + of_sexp_error ("int32_of_sexp: " ^ exn_to_string exc) sexp) + | List _ -> of_sexp_error "int32_of_sexp: atom needed" sexp + +let int64_of_sexp sexp = match sexp with + | Atom str -> + (try Int64.of_string str + with exc -> + of_sexp_error ("int64_of_sexp: " ^ exn_to_string exc) sexp) + | List _ -> of_sexp_error "int64_of_sexp: atom needed" sexp + +let nativeint_of_sexp sexp = match sexp with + | Atom str -> + (try Nativeint.of_string str + with exc -> + of_sexp_error ("nativeint_of_sexp: " ^ exn_to_string exc) sexp) + | List _ -> of_sexp_error "nativeint_of_sexp: atom needed" sexp + +let ref_of_sexp a__of_sexp sexp = ref (a__of_sexp sexp) +let lazy_t_of_sexp a__of_sexp sexp = Lazy.from_val (a__of_sexp sexp) + +let option_of_sexp a__of_sexp sexp = + if !read_old_option_format then + match sexp with + | List [] | Atom ("none" | "None") -> None + | List [el] | List [Atom ("some" | "Some"); el] -> Some (a__of_sexp el) + | List _ -> + of_sexp_error "option_of_sexp: list must represent optional value" sexp + | Atom _ -> of_sexp_error "option_of_sexp: only none can be atom" sexp + else + match sexp with + | Atom ("none" | "None") -> None + | List [Atom ("some" | "Some"); el] -> Some (a__of_sexp el) + | Atom _ -> of_sexp_error "option_of_sexp: only none can be atom" sexp + | List _ -> of_sexp_error "option_of_sexp: list must be (some el)" sexp + +let pair_of_sexp a__of_sexp b__of_sexp sexp = match sexp with + | List [a_sexp; b_sexp] -> + let a = a__of_sexp a_sexp in + let b = b__of_sexp b_sexp in + a, b + | List _ -> + of_sexp_error + "pair_of_sexp: list must contain exactly two elements only" sexp + | Atom _ -> of_sexp_error "pair_of_sexp: list needed" sexp + +let triple_of_sexp a__of_sexp b__of_sexp c__of_sexp sexp = match sexp with + | List [a_sexp; b_sexp; c_sexp] -> + let a = a__of_sexp a_sexp in + let b = b__of_sexp b_sexp in + let c = c__of_sexp c_sexp in + a, b, c + | List _ -> + of_sexp_error + "triple_of_sexp: list must contain exactly three elements only" sexp + | Atom _ -> of_sexp_error "triple_of_sexp: list needed" sexp + +let list_of_sexp a__of_sexp sexp = match sexp with + | List lst -> + let rev_lst = List.rev_map lst ~f:a__of_sexp in + List.rev rev_lst + | Atom _ -> of_sexp_error "list_of_sexp: list needed" sexp + +let array_of_sexp a__of_sexp sexp = match sexp with + | List [] -> [||] + | List (h :: t) -> + let len = List.length t + 1 in + let res = Array.create len (a__of_sexp h) in + let rec loop i = function + | [] -> res + | h :: t -> res.(i) <- a__of_sexp h; loop (i + 1) t in + loop 1 t + | Atom _ -> of_sexp_error "array_of_sexp: list needed" sexp + +let hashtbl_of_sexp key_of_sexp val_of_sexp sexp = match sexp with + | List lst -> + let htbl = Hashtbl.create 0 in + let act = function + | List [k_sexp; v_sexp] -> + Hashtbl.add htbl ~key:(key_of_sexp k_sexp) ~data:(val_of_sexp v_sexp) + | List _ | Atom _ -> + of_sexp_error "hashtbl_of_sexp: tuple list needed" sexp + in + List.iter lst ~f:act; + htbl + | Atom _ -> of_sexp_error "hashtbl_of_sexp: list needed" sexp + +let opaque_of_sexp sexp = + of_sexp_error "opaque_of_sexp: cannot convert opaque values" sexp + +let fun_of_sexp sexp = + of_sexp_error "fun_of_sexp: cannot convert function values" sexp + +(* Registering default exception printers *) + +let get_flc_error name (file, line, chr) = + Atom (sprintf "%s %s:%d:%d" name file line chr) + +let () = + List.iter + ~f:(fun (extension_constructor, handler) -> Exn_converter.add ~finalise:false extension_constructor handler) + [ + ( + [%extension_constructor Assert_failure], + (function + | Assert_failure arg -> get_flc_error "Assert_failure" arg + | _ -> assert false) + );( + [%extension_constructor Exit], + (function + | Exit -> Atom "Exit" + | _ -> assert false) + );( + [%extension_constructor End_of_file], + (function + | End_of_file -> Atom "End_of_file" + | _ -> assert false) + );( + [%extension_constructor Failure], + (function + | Failure arg -> List [Atom "Failure"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Not_found], + (function + | Not_found -> Atom "Not_found" + | _ -> assert false) + );( + [%extension_constructor Invalid_argument], + (function + | Invalid_argument arg -> List [Atom "Invalid_argument"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Match_failure], + (function + | Match_failure arg -> get_flc_error "Match_failure" arg + | _ -> assert false) + );( + [%extension_constructor Sys_error], + (function + | Sys_error arg -> List [Atom "Sys_error"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Arg.Help], + (function + | Arg.Help arg -> List [Atom "Arg.Help"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Arg.Bad], + (function + | Arg.Bad arg -> List [Atom "Arg.Bad"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Lazy.Undefined], + (function + | Lazy.Undefined -> Atom "Lazy.Undefined" + | _ -> assert false) + );( + [%extension_constructor Parsing.Parse_error], + (function + | Parsing.Parse_error -> Atom "Parsing.Parse_error" + | _ -> assert false) + );( + [%extension_constructor Queue.Empty], + (function + | Queue.Empty -> Atom "Queue.Empty" + | _ -> assert false) + );( + [%extension_constructor Scanf.Scan_failure], + (function + | Scanf.Scan_failure arg -> List [Atom "Scanf.Scan_failure"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Stack.Empty], + (function + | Stack.Empty -> Atom "Stack.Empty" + | _ -> assert false) + );( + [%extension_constructor Stream.Failure], + (function + | Stream.Failure -> Atom "Stream.Failure" + | _ -> assert false) + );( + [%extension_constructor Stream.Error], + (function + | Stream.Error arg -> List [Atom "Stream.Error"; Atom arg ] + | _ -> assert false) + );( + [%extension_constructor Sys.Break], + (function + | Sys.Break -> Atom "Sys.Break" + | _ -> assert false) + );( + [%extension_constructor Of_sexp_error], + (function + | Of_sexp_error (exc, sexp) -> + List [Atom "Sexplib.Conv.Of_sexp_error"; sexp_of_exn exc; sexp] + | _ -> assert false) + ); + ] + +external ignore : _ -> unit = "%ignore" +external ( = ) : 'a -> 'a -> bool = "%equal" diff --git a/src0/sexp_conv.mli b/src0/sexp_conv.mli new file mode 100644 index 0000000..1b0be64 --- /dev/null +++ b/src0/sexp_conv.mli @@ -0,0 +1,272 @@ +(** Utility Module for S-expression Conversions *) + +(** Dummy definitions for "optional" options, lists, and for opaque types *) +type sexp_bool = bool +type 'a sexp_option = 'a option +type 'a sexp_list = 'a list +type 'a sexp_array = 'a array +type 'a sexp_opaque = 'a + +(** {6 Conversion of OCaml-values to S-expressions} *) + +val default_string_of_float : (float -> string) ref +(** [default_string_of_float] reference to the default function used + to convert floats to strings. + + Initially set to [fun n -> sprintf "%.20G" n]. *) + +val write_old_option_format : bool ref +(** [write_old_option_format] reference for the default option format + used to write option values. If set to [true], the old-style option + format will be used, the new-style one otherwise. + + Initially set to [true]. *) + + +val read_old_option_format : bool ref +(** [read_old_option_format] reference for the default option format + used to read option values. [Of_sexp_error] will be raised + with old-style option values if this reference is set to [false]. + Reading new-style option values is always supported. Using a global + reference instead of changing the converter calling conventions is + the only way to avoid breaking old code with the standard macros. + + Initially set to [true]. *) + +(** We re-export a tail recursive map function, because some modules + override the standard library functions (e.g. [StdLabels]) which + wrecks havoc with the camlp4 extension. *) +val list_map : ('a -> 'b) -> 'a list -> 'b list + +val sexp_of_unit : unit -> Sexp.t +(** [sexp_of_unit ()] converts a value of type [unit] to an S-expression. *) + +val sexp_of_bool : bool -> Sexp.t +(** [sexp_of_bool b] converts the value [x] of type [bool] to an + S-expression. *) + +val sexp_of_string : string -> Sexp.t +(** [sexp_of_bool str] converts the value [str] of type [string] to an + S-expression. *) + +val sexp_of_char : char -> Sexp.t +(** [sexp_of_char c] converts the value [c] of type [char] to an + S-expression. *) + +val sexp_of_int : int -> Sexp.t +(** [sexp_of_int n] converts the value [n] of type [int] to an + S-expression. *) + +val sexp_of_float : float -> Sexp.t +(** [sexp_of_float n] converts the value [n] of type [float] to an + S-expression. *) + +val sexp_of_int32 : int32 -> Sexp.t +(** [sexp_of_int32 n] converts the value [n] of type [int32] to an + S-expression. *) + +val sexp_of_int64 : int64 -> Sexp.t +(** [sexp_of_int64 n] converts the value [n] of type [int64] to an + S-expression. *) + +val sexp_of_nativeint : nativeint -> Sexp.t +(** [sexp_of_nativeint n] converts the value [n] of type [nativeint] to an + S-expression. *) + +val sexp_of_ref : ('a -> Sexp.t) -> 'a ref -> Sexp.t +(** [sexp_of_ref conv r] converts the value [r] of type ['a ref] to + an S-expression. Uses [conv] to convert values of type ['a] to an + S-expression. *) + +val sexp_of_lazy_t : ('a -> Sexp.t) -> 'a lazy_t -> Sexp.t +(** [sexp_of_lazy_t conv l] converts the value [l] of type ['a lazy_t] to + an S-expression. Uses [conv] to convert values of type ['a] to an + S-expression. *) + +val sexp_of_option : ('a -> Sexp.t) -> 'a option -> Sexp.t +(** [sexp_of_option conv opt] converts the value [opt] of type ['a + option] to an S-expression. Uses [conv] to convert values of type + ['a] to an S-expression. *) + +val sexp_of_pair : ('a -> Sexp.t) -> ('b -> Sexp.t) -> 'a * 'b -> Sexp.t +(** [sexp_of_pair conv1 conv2 pair] converts a pair to an S-expression. + It uses its first argument to convert the first element of the pair, + and its second argument to convert the second element of the pair. *) + +val sexp_of_triple : + ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('c -> Sexp.t) -> 'a * 'b * 'c -> Sexp.t +(** [sexp_of_triple conv1 conv2 conv3 triple] converts a triple to + an S-expression using [conv1], [conv2], and [conv3] to convert its + elements. *) + +val sexp_of_list : ('a -> Sexp.t) -> 'a list -> Sexp.t +(** [sexp_of_list conv lst] converts the value [lst] of type ['a + list] to an S-expression. Uses [conv] to convert values of type + ['a] to an S-expression. *) + +val sexp_of_array : ('a -> Sexp.t) -> 'a array -> Sexp.t +(** [sexp_of_array conv ar] converts the value [ar] of type ['a + array] to an S-expression. Uses [conv] to convert values of type + ['a] to an S-expression. *) + +val sexp_of_hashtbl : + ('a -> Sexp.t) -> ('b -> Sexp.t) -> ('a, 'b) Hashtbl.t -> Sexp.t +(** [sexp_of_hashtbl conv_key conv_value htbl] converts the value [htbl] + of type [('a, 'b) Hashtbl.t] to an S-expression. Uses [conv_key] + to convert the hashtable keys of type ['a], and [conv_value] to + convert hashtable values of type ['b] to S-expressions. *) + +val sexp_of_opaque : 'a -> Sexp.t +(** [sexp_of_opaque x] converts the value [x] of opaque type to an + S-expression. This means the user need not provide converters, + but the result cannot be interpreted. *) + +val sexp_of_fun : ('a -> 'b) -> Sexp.t +(** [sexp_of_fun f] converts the value [f] of function type to a + dummy S-expression. Functions cannot be serialized as S-expressions, + but at least a placeholder can be generated for pretty-printing. *) + + +(** {6 Conversion of S-expressions to OCaml-values} *) + +exception Of_sexp_error of exn * Sexp.t +(** [Of_sexp_error (exn, sexp)] the exception raised when an S-expression + could not be successfully converted to an OCaml-value. *) + +val record_check_extra_fields : bool ref +(** [record_check_extra_fields] checks for extra (= unknown) fields + in record S-expressions. *) + +val of_sexp_error : string -> Sexp.t -> 'a +(** [of_sexp_error reason sexp] @raise Of_sexp_error (Failure reason, sexp). *) + +val of_sexp_error_exn : exn -> Sexp.t -> 'a +(** [of_sexp_error exc sexp] @raise Of_sexp_error (exc, sexp). *) + +val unit_of_sexp : Sexp.t -> unit +(** [unit_of_sexp sexp] converts S-expression [sexp] to a value of type + [unit]. *) + +val bool_of_sexp : Sexp.t -> bool +(** [bool_of_sexp sexp] converts S-expression [sexp] to a value of type + [bool]. *) + +val string_of_sexp : Sexp.t -> string +(** [string_of_sexp sexp] converts S-expression [sexp] to a value of type + [string]. *) + +val char_of_sexp : Sexp.t -> char +(** [char_of_sexp sexp] converts S-expression [sexp] to a value of type + [char]. *) + +val int_of_sexp : Sexp.t -> int +(** [int_of_sexp sexp] converts S-expression [sexp] to a value of type + [int]. *) + +val float_of_sexp : Sexp.t -> float +(** [float_of_sexp sexp] converts S-expression [sexp] to a value of type + [float]. *) + +val int32_of_sexp : Sexp.t -> int32 +(** [int32_of_sexp sexp] converts S-expression [sexp] to a value of type + [int32]. *) + +val int64_of_sexp : Sexp.t -> int64 +(** [int64_of_sexp sexp] converts S-expression [sexp] to a value of type + [int64]. *) + +val nativeint_of_sexp : Sexp.t -> nativeint +(** [nativeint_of_sexp sexp] converts S-expression [sexp] to a value + of type [nativeint]. *) + +val ref_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a ref +(** [ref_of_sexp conv sexp] converts S-expression [sexp] to a value + of type ['a ref] using conversion function [conv], which converts + an S-expression to a value of type ['a]. *) + +val lazy_t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a lazy_t +(** [lazy_t_of_sexp conv sexp] converts S-expression [sexp] to a value + of type ['a lazy_t] using conversion function [conv], which converts + an S-expression to a value of type ['a]. *) + +val option_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a option +(** [option_of_sexp conv sexp] converts S-expression [sexp] to a value + of type ['a option] using conversion function [conv], which converts + an S-expression to a value of type ['a]. *) + +val pair_of_sexp : (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> 'a * 'b +(** [pair_of_sexp conv1 conv2 sexp] converts S-expression [sexp] to a pair + of type ['a * 'b] using conversion functions [conv1] and [conv2], + which convert S-expressions to values of type ['a] and ['b] + respectively. *) + +val triple_of_sexp : + (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> (Sexp.t -> 'c) -> Sexp.t -> 'a * 'b * 'c +(** [triple_of_sexp conv1 conv2 conv3 sexp] converts S-expression [sexp] + to a triple of type ['a * 'b * 'c] using conversion functions [conv1], + [conv2], and [conv3], which convert S-expressions to values of type + ['a], ['b], and ['c] respectively. *) + +val list_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a list +(** [list_of_sexp conv sexp] converts S-expression [sexp] to a value + of type ['a list] using conversion function [conv], which converts + an S-expression to a value of type ['a]. *) + +val array_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a array +(** [array_of_sexp conv sexp] converts S-expression [sexp] to a value + of type ['a array] using conversion function [conv], which converts + an S-expression to a value of type ['a]. *) + +val hashtbl_of_sexp : + (Sexp.t -> 'a) -> (Sexp.t -> 'b) -> Sexp.t -> ('a, 'b) Hashtbl.t +(** [hashtbl_of_sexp conv_key conv_value sexp] converts S-expression + [sexp] to a value of type [('a, 'b) Hashtbl.t] using conversion + function [conv_key], which converts an S-expression to hashtable + key of type ['a], and function [conv_value], which converts an + S-expression to hashtable value of type ['b]. *) + +val opaque_of_sexp : Sexp.t -> 'a +(** [opaque_of_sexp sexp] @raise Of_sexp_error when attempting to + convert an S-expression to an opaque value. *) + +val fun_of_sexp : Sexp.t -> 'a +(** [fun_of_sexp sexp] @raise Of_sexp_error when attempting to + convert an S-expression to a function. *) + + +(** Exception converters *) + +val sexp_of_exn : exn -> Sexp.t +(** [sexp_of_exn exc] converts exception [exc] to an S-expression. + If no suitable converter is found, the standard converter in + [Printexc] will be used to generate an atomic S-expression. *) + +val sexp_of_exn_opt : exn -> Sexp.t option +(** [sexp_of_exn_opt exc] converts exception [exc] to [Some sexp]. + If no suitable converter is found, [None] is returned instead. *) + +module Exn_converter : sig + val add_auto : ?finalise : bool -> exn -> (exn -> Sexp.t) -> unit + [@@deprecated "[since 2016-07] use Conv.Exn_converter.add"] + + val add : ?finalise : bool -> extension_constructor -> (exn -> Sexp.t) -> unit + (** [add ?finalise constructor sexp_of_exn] registers exception S-expression + converter [sexp_of_exn] for exceptions with the given [constructor]. + + NOTE: If [finalise] is [true], then the exception will be automatically + registered for removal with the GC (default). Finalisation will not work + with exceptions that have been allocated outside the heap, which is the + case for some standard ones e.g. [Sys_error]. + + @param finalise default = [true] *) + + module For_unit_tests_only : sig + val size : unit -> int + end +end + +(**/**) +(*_ For the syntax extension *) +external ignore : _ -> unit = "%ignore" +external ( = ) : 'a -> 'a -> bool = "%equal" + diff --git a/src0/sexp_conv_error.ml b/src0/sexp_conv_error.ml new file mode 100644 index 0000000..6c087fa --- /dev/null +++ b/src0/sexp_conv_error.ml @@ -0,0 +1,122 @@ +(* Conv_error: Module for Handling Errors during Automated S-expression + Conversions *) + +open StdLabels +open Printf +open Sexp_conv + +(* Errors concerning tuples *) + +let tuple_of_size_n_expected loc n sexp = + of_sexp_error (sprintf "%s_of_sexp: tuple of size %d expected" loc n) sexp + + +(* Errors concerning sum types *) + +let stag_no_args loc sexp = + of_sexp_error (loc ^ "_of_sexp: sum tag does not take arguments") sexp + +let stag_incorrect_n_args loc tag sexp = + let msg = + sprintf "%s_of_sexp: sum tag %S has incorrect number of arguments" loc tag + in + of_sexp_error msg sexp + +let stag_takes_args loc sexp = + of_sexp_error (loc ^ "_of_sexp: sum tag must be a structured value") sexp + +let nested_list_invalid_sum loc sexp = + of_sexp_error (loc ^ "_of_sexp: a nested list is an invalid sum") sexp + +let empty_list_invalid_sum loc sexp = + of_sexp_error (loc ^ "_of_sexp: the empty list is an invalid sum") sexp + +let unexpected_stag loc sexp = + of_sexp_error (loc ^ "_of_sexp: unexpected sum tag") sexp + + +(* Errors concerning records *) + +let record_only_pairs_expected loc sexp = + let msg = + loc ^ + "_of_sexp: record conversion: only pairs expected, \ + their first element must be an atom" in + of_sexp_error msg sexp + +let record_superfluous_fields ~what ~loc rev_fld_names sexp = + let fld_names_str = String.concat (List.rev rev_fld_names) ~sep:" " in + let msg = sprintf "%s_of_sexp: %s: %s" loc what fld_names_str in + of_sexp_error msg sexp + +let record_duplicate_fields loc rev_fld_names sexp = + record_superfluous_fields ~what:"duplicate fields" ~loc rev_fld_names sexp + +let record_extra_fields loc rev_fld_names sexp = + record_superfluous_fields ~what:"extra fields" ~loc rev_fld_names sexp + +let rec record_get_undefined_loop fields = function + | [] -> String.concat (List.rev fields) ~sep:" " + | (true, field) :: rest -> record_get_undefined_loop (field :: fields) rest + | _ :: rest -> record_get_undefined_loop fields rest + +let record_undefined_elements loc sexp lst = + let undefined = record_get_undefined_loop [] lst in + let msg = + sprintf "%s_of_sexp: the following record elements were undefined: %s" + loc undefined + in + of_sexp_error msg sexp + +let record_list_instead_atom loc sexp = + let msg = loc ^ "_of_sexp: list instead of atom for record expected" in + of_sexp_error msg sexp + +let record_poly_field_value loc sexp = + let msg = + loc ^ + "_of_sexp: cannot convert values of types resulting from polymorphic \ + record fields" + in + of_sexp_error msg sexp + + +(* Errors concerning polymorphic variants *) + +exception No_variant_match + +let no_variant_match () = + raise No_variant_match + +let no_matching_variant_found loc sexp = + of_sexp_error (loc ^ "_of_sexp: no matching variant found") sexp + +let ptag_no_args loc sexp = + of_sexp_error ( + loc ^ "_of_sexp: polymorphic variant does not take arguments") sexp + +let ptag_incorrect_n_args loc cnstr sexp = + let msg = + sprintf + "%s_of_sexp: polymorphic variant tag %S has incorrect number of arguments" + loc cnstr + in + of_sexp_error msg sexp + +let ptag_takes_args loc sexp = + of_sexp_error (loc ^ "_of_sexp: polymorphic variant tag takes an argument") + sexp + +let nested_list_invalid_poly_var loc sexp = + of_sexp_error ( + loc ^ "_of_sexp: a nested list is an invalid polymorphic variant") sexp + +let empty_list_invalid_poly_var loc sexp = + of_sexp_error ( + loc ^ "_of_sexp: the empty list is an invalid polymorphic variant") sexp + +let silly_type loc sexp = + of_sexp_error (loc ^ "_of_sexp: trying to convert a silly type") sexp + +let empty_type loc sexp = + of_sexp_error (loc ^ "_of_sexp: trying to convert an empty type") sexp