mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
example using LazyGraph and Futures to search a path between two urls
This commit is contained in:
parent
69d75de295
commit
a5d9a39afd
3 changed files with 57 additions and 1 deletions
2
Makefile
2
Makefile
|
|
@ -4,7 +4,7 @@ IMPLEMENTATION_FILES = $(shell find -name '*.ml')
|
||||||
|
|
||||||
TARGETS_LIB = containers.cmxa containers.cma
|
TARGETS_LIB = containers.cmxa containers.cma
|
||||||
TARGET_THREAD_LIB = thread_containers.cmxa thread_containers.cma
|
TARGET_THREAD_LIB = thread_containers.cmxa thread_containers.cma
|
||||||
EXAMPLES = examples/mem_size.native examples/collatz.native
|
EXAMPLES = examples/mem_size.native examples/collatz.native examples/crawl.native
|
||||||
OPTIONS = -use-ocamlfind
|
OPTIONS = -use-ocamlfind
|
||||||
|
|
||||||
all: lib lib_thread
|
all: lib lib_thread
|
||||||
|
|
|
||||||
1
_tags
1
_tags
|
|
@ -1,2 +1,3 @@
|
||||||
<**/*future.*>: thread
|
<**/*future.*>: thread
|
||||||
|
<examples/crawl.*>: package(batteries), thread, package(unix)
|
||||||
<tests/*.native>: thread
|
<tests/*.native>: thread
|
||||||
|
|
|
||||||
55
examples/crawl.ml
Normal file
55
examples/crawl.ml
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
|
||||||
|
(** Crawl the web to find shortest path between two urls *)
|
||||||
|
|
||||||
|
open Batteries
|
||||||
|
|
||||||
|
let pool = Future.Pool.create ~timeout:15. ~size:15
|
||||||
|
|
||||||
|
let split_lines s = String.nsplit s ~by:"\n"
|
||||||
|
|
||||||
|
let get_and_parse url =
|
||||||
|
let cmd = Format.sprintf "wget -q '%s' -O - | grep -o 'http://[^ \"]*.html'" url in
|
||||||
|
let content = Future.spawn_process ?stdin:None ~pool ~cmd in
|
||||||
|
content
|
||||||
|
|> Future.map (fun (_, stdout, _) -> stdout)
|
||||||
|
|> Future.map split_lines
|
||||||
|
|> Batteries.tap (fun lines ->
|
||||||
|
Future.on_success lines (fun lines -> Format.printf "downloaded %s (%d urls)@." url (List.length lines)))
|
||||||
|
|
||||||
|
type page = string * (string list Future.t)
|
||||||
|
|
||||||
|
(** The web graph; its vertices are annotated by futures of the content *)
|
||||||
|
let g : (page, unit, unit) LazyGraph.t =
|
||||||
|
let force (url, future) =
|
||||||
|
Format.printf "force %s@." url;
|
||||||
|
let urls = Future.get future
|
||||||
|
|> List.map (fun url -> (), (url, get_and_parse url)) in
|
||||||
|
let edges = Gen.of_list urls in
|
||||||
|
(* need to parse the page to get the urls *)
|
||||||
|
LazyGraph.Node ((url, future), (), edges)
|
||||||
|
in LazyGraph.make
|
||||||
|
~eq:(fun (url1,_) (url2,_) -> url1 = url2)
|
||||||
|
~hash:(fun (url,_) -> Hashtbl.hash url)
|
||||||
|
force
|
||||||
|
|
||||||
|
let pp_path fmt path =
|
||||||
|
List.print ~sep:"\n"
|
||||||
|
(fun fmt ((u1,_), (), (u2,_)) ->
|
||||||
|
String.print fmt u1; String.print fmt " -> "; String.print fmt u2)
|
||||||
|
fmt path
|
||||||
|
|
||||||
|
(* seek a path from the first url to the second *)
|
||||||
|
let main from into =
|
||||||
|
Format.printf "seek path from %s to %s@." from into;
|
||||||
|
let on_explore (url,_) = Format.printf " explore %s...@." url in
|
||||||
|
try
|
||||||
|
let cost, path = LazyGraph.dijkstra ~on_explore g
|
||||||
|
(from, get_and_parse from) (into, get_and_parse into) in
|
||||||
|
Printf.printf "found path (cost %f):\n%a\n" cost pp_path path
|
||||||
|
with Not_found ->
|
||||||
|
Format.printf "no path could be found@."
|
||||||
|
|
||||||
|
let _ =
|
||||||
|
if Array.length Sys.argv < 3
|
||||||
|
then Format.printf "usage: crawl url1 url2"
|
||||||
|
else main Sys.argv.(1) Sys.argv.(2)
|
||||||
Loading…
Add table
Reference in a new issue