ocaml-containers/0.19/Containers_string.Levenshtein.html
2016-08-22 10:17:49 +02:00

173 lines
No EOL
11 KiB
HTML

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link rel="stylesheet" href="style.css" type="text/css">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<link rel="Start" href="index.html">
<link rel="previous" href="Containers_string.KMP.html">
<link rel="Up" href="Containers_string.html">
<link title="Index of types" rel=Appendix href="index_types.html">
<link title="Index of exceptions" rel=Appendix href="index_exceptions.html">
<link title="Index of values" rel=Appendix href="index_values.html">
<link title="Index of class methods" rel=Appendix href="index_methods.html">
<link title="Index of class types" rel=Appendix href="index_class_types.html">
<link title="Index of modules" rel=Appendix href="index_modules.html">
<link title="Index of module types" rel=Appendix href="index_module_types.html">
<link title="CCVector" rel="Chapter" href="CCVector.html">
<link title="CCPrint" rel="Chapter" href="CCPrint.html">
<link title="CCError" rel="Chapter" href="CCError.html">
<link title="CCHeap" rel="Chapter" href="CCHeap.html">
<link title="CCList" rel="Chapter" href="CCList.html">
<link title="CCOpt" rel="Chapter" href="CCOpt.html">
<link title="CCPair" rel="Chapter" href="CCPair.html">
<link title="CCFun" rel="Chapter" href="CCFun.html">
<link title="CCHash" rel="Chapter" href="CCHash.html">
<link title="CCInt" rel="Chapter" href="CCInt.html">
<link title="CCBool" rel="Chapter" href="CCBool.html">
<link title="CCFloat" rel="Chapter" href="CCFloat.html">
<link title="CCArray" rel="Chapter" href="CCArray.html">
<link title="CCRef" rel="Chapter" href="CCRef.html">
<link title="CCSet" rel="Chapter" href="CCSet.html">
<link title="CCOrd" rel="Chapter" href="CCOrd.html">
<link title="CCRandom" rel="Chapter" href="CCRandom.html">
<link title="CCString" rel="Chapter" href="CCString.html">
<link title="CCHashtbl" rel="Chapter" href="CCHashtbl.html">
<link title="CCMap" rel="Chapter" href="CCMap.html">
<link title="CCFormat" rel="Chapter" href="CCFormat.html">
<link title="CCIO" rel="Chapter" href="CCIO.html">
<link title="CCInt64" rel="Chapter" href="CCInt64.html">
<link title="CCChar" rel="Chapter" href="CCChar.html">
<link title="CCResult" rel="Chapter" href="CCResult.html">
<link title="Containers" rel="Chapter" href="Containers.html">
<link title="CCKTree" rel="Chapter" href="CCKTree.html">
<link title="CCKList" rel="Chapter" href="CCKList.html">
<link title="CCLazy_list" rel="Chapter" href="CCLazy_list.html">
<link title="CCMultiMap" rel="Chapter" href="CCMultiMap.html">
<link title="CCMultiSet" rel="Chapter" href="CCMultiSet.html">
<link title="CCTrie" rel="Chapter" href="CCTrie.html">
<link title="CCFlatHashtbl" rel="Chapter" href="CCFlatHashtbl.html">
<link title="CCCache" rel="Chapter" href="CCCache.html">
<link title="CCPersistentHashtbl" rel="Chapter" href="CCPersistentHashtbl.html">
<link title="CCDeque" rel="Chapter" href="CCDeque.html">
<link title="CCFQueue" rel="Chapter" href="CCFQueue.html">
<link title="CCBV" rel="Chapter" href="CCBV.html">
<link title="CCMixtbl" rel="Chapter" href="CCMixtbl.html">
<link title="CCMixmap" rel="Chapter" href="CCMixmap.html">
<link title="CCRingBuffer" rel="Chapter" href="CCRingBuffer.html">
<link title="CCIntMap" rel="Chapter" href="CCIntMap.html">
<link title="CCPersistentArray" rel="Chapter" href="CCPersistentArray.html">
<link title="CCMixset" rel="Chapter" href="CCMixset.html">
<link title="CCHashconsedSet" rel="Chapter" href="CCHashconsedSet.html">
<link title="CCGraph" rel="Chapter" href="CCGraph.html">
<link title="CCHashSet" rel="Chapter" href="CCHashSet.html">
<link title="CCBitField" rel="Chapter" href="CCBitField.html">
<link title="CCHashTrie" rel="Chapter" href="CCHashTrie.html">
<link title="CCBloom" rel="Chapter" href="CCBloom.html">
<link title="CCWBTree" rel="Chapter" href="CCWBTree.html">
<link title="CCRAL" rel="Chapter" href="CCRAL.html">
<link title="CCAllocCache" rel="Chapter" href="CCAllocCache.html">
<link title="CCImmutArray" rel="Chapter" href="CCImmutArray.html">
<link title="CCHet" rel="Chapter" href="CCHet.html">
<link title="Containers_string" rel="Chapter" href="Containers_string.html">
<link title="CCKMP" rel="Chapter" href="CCKMP.html">
<link title="CCLevenshtein" rel="Chapter" href="CCLevenshtein.html">
<link title="CCApp_parse" rel="Chapter" href="CCApp_parse.html">
<link title="CCParse" rel="Chapter" href="CCParse.html">
<link title="CCBigstring" rel="Chapter" href="CCBigstring.html">
<link title="CCArray1" rel="Chapter" href="CCArray1.html">
<link title="CCPool" rel="Chapter" href="CCPool.html">
<link title="CCLock" rel="Chapter" href="CCLock.html">
<link title="CCSemaphore" rel="Chapter" href="CCSemaphore.html">
<link title="CCThread" rel="Chapter" href="CCThread.html">
<link title="CCBlockingQueue" rel="Chapter" href="CCBlockingQueue.html">
<link title="CCTimer" rel="Chapter" href="CCTimer.html">
<link title="Containers_advanced" rel="Chapter" href="Containers_advanced.html">
<link title="CCLinq" rel="Chapter" href="CCLinq.html">
<link title="CCBatch" rel="Chapter" href="CCBatch.html">
<link title="CCCat" rel="Chapter" href="CCCat.html">
<link title="CCMonadIO" rel="Chapter" href="CCMonadIO.html">
<link title="Containers_io_is_deprecated" rel="Chapter" href="Containers_io_is_deprecated.html">
<link title="CCUnix" rel="Chapter" href="CCUnix.html">
<link title="CCSexp" rel="Chapter" href="CCSexp.html">
<link title="CCSexpM" rel="Chapter" href="CCSexpM.html"><link title="Abstraction over Strings" rel="Section" href="#2_AbstractionoverStrings">
<link title="Continuation list" rel="Section" href="#2_Continuationlist">
<link title="Signature" rel="Section" href="#2_Signature">
<link title="Functor" rel="Section" href="#2_Functor">
<link title="Default instance: string" rel="Section" href="#2_Defaultinstancestring">
<title>Containers doc : Containers_string.Levenshtein</title>
</head>
<body>
<div class="navbar"><a class="pre" href="Containers_string.KMP.html" title="Containers_string.KMP">Previous</a>
&nbsp;<a class="up" href="Containers_string.html" title="Containers_string">Up</a>
&nbsp;</div>
<h1>Module <a href="type_Containers_string.Levenshtein.html">Containers_string.Levenshtein</a></h1>
<pre><span class="keyword">module</span> Levenshtein: <code class="type"><a href="CCLevenshtein.html">CCLevenshtein</a></code></pre><hr width="100%">
<pre><span id="TYPEsequence"><span class="keyword">type</span> <code class="type">'a</code> sequence</span> = <code class="type">('a -> unit) -> unit</code> </pre>
<pre><span id="TYPEgen"><span class="keyword">type</span> <code class="type">'a</code> gen</span> = <code class="type">unit -> 'a option</code> </pre>
<br>
<h2 id="2_AbstractionoverStrings">Abstraction over Strings</h2>
Due to the existence of several encodings and string representations we
abstract over the type of strings. A string is a finite array of characters
(8-bits char, unicode runes, etc.) which provides a length operation
and a function to access the n-th character.<br>
<pre><span class="keyword">module type</span> <a href="CCLevenshtein.STRING.html">STRING</a> = <code class="code"><span class="keyword">sig</span></code> <a href="CCLevenshtein.STRING.html">..</a> <code class="code"><span class="keyword">end</span></code></pre><br>
<h2 id="2_Continuationlist">Continuation list</h2>
<p>
This data structure is used to represent a list of result that is
evaluated only as far as the user wants. If the user only wants a few elements,
she doesn't pay for the remaining ones.
<p>
In particular, when matching a string against a (big) set of indexed
strings, we return a continuation list so that, even if there are many results,
only those actually asked for are evaluated.<br>
<pre><span id="TYPEklist"><span class="keyword">type</span> <code class="type">'a</code> klist</span> = <code class="type">unit -> [ `Cons of 'a * 'a <a href="CCLevenshtein.html#TYPEklist">klist</a> | `Nil ]</code> </pre>
<pre><span id="VALklist_to_list"><span class="keyword">val</span> klist_to_list</span> : <code class="type">'a <a href="CCLevenshtein.html#TYPEklist">klist</a> -> 'a list</code></pre><div class="info ">
Helper for short lists.<br>
</div>
<br>
<h2 id="2_Signature">Signature</h2>
<p>
The signature for a given string representation provides 3 main things:
<p>
<ul>
<li>a <code class="code">edit_distance</code> function to compute the edit distance between strings</li>
<li>an <code class="code">automaton</code> type that is built from a string <code class="code">s</code> and a maximum distance <code class="code">n</code>,
and only accepts the strings <code class="code">s'</code> such that <code class="code">edit_distance s s' &lt;= n</code>.</li>
<li>an <code class="code"><span class="constructor">Index</span></code> module that can be used to map many strings to values, like
a regular string map, but for which retrieval is fuzzy (for a given
maximal distance).</li>
</ul>
A possible use of the index could be:
<pre class="codepre"><code class="code"><span class="keyword">let</span>&nbsp;words&nbsp;=&nbsp;<span class="constructor">CCIO</span>.with_in&nbsp;<span class="string">"/usr/share/dict/words"</span><br>
&nbsp;&nbsp;(<span class="keyword">fun</span>&nbsp;i&nbsp;<span class="keywordsign">-&gt;</span>&nbsp;<span class="constructor">CCIO</span>.read_all&nbsp;i&nbsp;|&gt;&nbsp;<span class="constructor">CCString</span>.<span class="constructor">Split</span>.list_cpy&nbsp;~by:<span class="string">"\n"</span>);;<br>
<br>
<span class="keyword">let</span>&nbsp;words&nbsp;=&nbsp;<span class="constructor">List</span>.map&nbsp;(<span class="keyword">fun</span>&nbsp;s<span class="keywordsign">-&gt;</span>s,s)&nbsp;words;;<br>
<span class="keyword">let</span>&nbsp;idx&nbsp;=&nbsp;<span class="constructor">CCLevenshtein</span>.<span class="constructor">Index</span>.of_list&nbsp;words;;<br>
<br>
<span class="constructor">CCLevenshtein</span>.<span class="constructor">Index</span>.retrieve&nbsp;~limit:1&nbsp;idx&nbsp;<span class="string">"hell"</span>&nbsp;|&gt;&nbsp;<span class="constructor">CCLevenshtein</span>.klist_to_list;;<br>
</code></pre><br>
<pre><span class="keyword">module type</span> <a href="CCLevenshtein.S.html">S</a> = <code class="code"><span class="keyword">sig</span></code> <a href="CCLevenshtein.S.html">..</a> <code class="code"><span class="keyword">end</span></code></pre><br>
<h2 id="2_Functor">Functor</h2><br>
<pre><span class="keyword">module</span> <a href="CCLevenshtein.Make.html">Make</a> <code class="code">(</code><code class="code"><span class="constructor">Str</span></code><code class="code"> : </code><code class="type"><a href="CCLevenshtein.STRING.html">STRING</a></code><code class="code">) </code>: <code class="type"><a href="CCLevenshtein.S.html">S</a></code><code class="type">
with type string_ = Str.t
and type char_ = Str.char_</code></pre><br>
<h2 id="2_Defaultinstancestring">Default instance: string</h2><br>
<pre><span class="keyword">include</span> <a href="CCLevenshtein.S.html">CCLevenshtein.S</a></pre>
<pre><span id="VALdebug_print"><span class="keyword">val</span> debug_print</span> : <code class="type">Pervasives.out_channel -> automaton -> unit</code></pre></body></html>