4

So, I just ported the Trie from OCaml. Unfortunately, it runs slower than the standard Map in terms of tryFind. I don't understand this - the trie seems like it should be faster. Is F#'s code libraries built in some special way as to make them faster than the code that the user typically deploys?

Here's the code -

[<RequireQualifiedAccess>]
module Trie

type Node<'k, 'v when 'k : comparison> =
    { TrieMap : Map<'k, Node<'k, 'v>>
      TrieKvp : ('k list * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'k, 'v> = make Map.empty None

let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty

let rec tryFind (key : 'k list) node =
    if key.IsEmpty then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let optSubNode = Map.tryFind keyHead node.TrieMap
        match optSubNode with
        | Some subNode -> tryFind keyTail subNode
        | None -> None

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : 'k list) value node =
    if key.IsEmpty then make node.TrieMap (Some (key, value))
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let newTrie =
            match Map.tryFind keyHead node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal keyTail value newTrie
        make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

Here's a performance test that Jon Harrop provided that I find adequate for measuring improvements -

let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
    t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
    t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

NOTE: if you have a faster lookup data structure in mind, please note that I need a persistent data structure.

Guy Coder
  • 24,501
  • 8
  • 71
  • 136
Bryan Edds
  • 1,696
  • 12
  • 28
  • 2
    Can you provide a test showing that your `Trie` is slower than `Map`? – pad Jul 13 '12 at 13:55
  • If you're willing to trade space for lookup speed you could store the keys in sparse arrays. – Daniel Jul 13 '12 at 14:27
  • Danial - I believe using sparse arrays will either trade away persistence or performance (depending on how its used). – Bryan Edds Jul 13 '12 at 14:52
  • @BryanEdds: Have you tried using `list` instead of `Map`? – Daniel Jul 13 '12 at 14:58
  • Daniel - Nope - I could definitely try that though. – Bryan Edds Jul 13 '12 at 15:00
  • 4
    I don't see any tests showing how you are measuring performance. I feel like most people who ask performance questions have no clue what they are doing, how to test, how to use a variety of test data, how to measure, how to apply statistics. Prove yourself otherwise if you want a response. – Brian Jul 13 '12 at 16:41
  • I've already measured and profiled - using it slows my program down about 2x. Once one profiles, one then must proceed with an analysis. It would seem obvious that profilers only show the speed of portions of a program, not why each portion is actually slow. A priori analysis is, at some point, required. This is what I'm requesting here. However, I've been working on an isolated test, but am currently at work so haven't had time to paste it here. At any rate, feel free not to help. I have nothing to prove to you, and your personal grudges are your problem, not mine. – Bryan Edds Jul 13 '12 at 17:03
  • 2
    @BryanEdds: Brian's probably going off the fact that you asked a question about performance, which is subjectively defined, but didn't state how you expect it to be measured/improved. In that form, it's only a guessing game and de facto unanswerable. You could start by showing code that demonstrates the difference. – Daniel Jul 13 '12 at 17:12
  • It does depend on how you look at it, I suppose. Fortunately there have already been a lot of productive disussion by toyvo (see below), and once I understand the problem, I'll post the solution here. This thread will evolve as I build the things as people request them. But it takes time, so requires patience I'm afraid. Either way, I can't see any good reason for Brian's behavior. Flaming people on stack overflow is just silly. – Bryan Edds Jul 13 '12 at 17:22
  • 1
    He's answered a lot of questions and probably seen a lot of duds. That could make a person crotchety. Just imagine he was smiling when he typed that. – Daniel Jul 13 '12 at 17:33
  • 1
    Ya, you're probably right. I'm pretty crotchety right now myself. Anywho, I'll keep working on making this question better so it can be more answerable. Thanks! – Bryan Edds Jul 13 '12 at 17:42
  • So, I've found Jon's measurement code to be adequate, thus I've pasted it into the question. Improving this measurement improves the data structure according to this question. – Bryan Edds Jul 16 '12 at 13:19

3 Answers3

4

Unfortunately, it runs slower than the standard Map in terms of tryFind. I don't understand this - the trie seems like it should be faster.

A quick benchmark here suggests that your trie is already faster than Map for at least simple case:

do
    let n = 0
    let xs = Array.init 1000000 (fun i -> [i])
    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Trie.makeEmpty()
    for i=0 to xs.Length-1 do
        t <- Trie.add xs.[i] xs.[i] t
    printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Trie.tryFind xs.[i])
    printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Map.empty
    for i=0 to xs.Length-1 do
        t <- Map.add xs.[i] xs.[i] t
    printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Map.tryFind xs.[i])
    printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

I get 4s to build your Trie, 8.7s to build a Map and about 0.7 to search in both cases.

However, there is a lot of room for improvement in your implementation. I recently wrote an article about an optimized generic persistent hash trie implementation in F# that was published here.

Your later comments imply that you only want to use this to map over strings. If so, it would be vastly more efficient to specialize your trie for string keys.

EDIT

KVB suggested that I elaborate on the "room for improvement" so here's some feedback:

  • Use inline sparingly as an optimization and only on the basis of compelling performance measurements.
  • Make empty a value rather than a function.
  • Avoid List.head and List.tail whenever possible. Use pattern matching instead.
  • Avoid genericity when possible (e.g. hard-code for string keys in this case).
gen_Eric
  • 223,194
  • 41
  • 299
  • 337
J D
  • 48,105
  • 13
  • 171
  • 274
  • 10
    Rather than just linking to a blurb about a not-freely-available article, perhaps you could also include some specific thoughts on the "room for improvement" you see? – kvb Jul 14 '12 at 17:47
  • I was hoping you'd pop in Jon! I have yet to get a subscsription to your site, but if the optimized generic hash impl you made is also persistent, I'd definitely sign up just to see it. Of course, that doesn't really help other SO members, so feel free to elaborate here if you like :) – Bryan Edds Jul 15 '12 at 01:01
  • 1
    @BryanEdds I'll happily elaborate on the room for improvement but you'll have to forgive me for keeping the quality content behind a paywall. Kids to feed and all that. ;-) – J D Jul 15 '12 at 10:38
  • I have tried to make it a value, but I keep getting the value restriction - ``let empty = make Map.empty None``. I tried adding an annotation, but to know avail. Do you know how to make this compile? – Bryan Edds Jul 16 '12 at 12:14
  • 10
    Please stop posting links to your own articles that are behind a paywall. Is the links **absolutely** necessary for the answer? – ChrisF Jul 19 '12 at 13:20
  • 2
    Answers on Stack Overflow are expected to stand on their own. The part about "room for improvement" does not add to the answer because it requires visiting an external link. Paid or free content is irrelevant. – JimmyPena Jul 19 '12 at 15:27
  • @ChrisF "Please stop posting links to your own articles that are behind a paywall. Is the links absolutely necessary for the answer?" The link is to an article I recently wrote on this exact topic (optimizing tries in F) that goes into more detail than I have ever seen elsewhere. – J D Jul 19 '12 at 21:38
  • 4
    @JonHarrop That may be the case, but as it's behind a paywall it's coming across as self promotion which (as you should know) is not welcome. – ChrisF Jul 19 '12 at 21:41
  • @ChrisF "not welcome". According to a meta QA I was referred to about this, self promotion is not frowned upon on SO as long you make your affiliation clear (which I did). – J D Jul 19 '12 at 21:53
  • 1
    @ChrisF "I accept that there's probably not a "huge" percentage at the moment, but it could get that way". True. I was horrified to learn that 50% of my top-voted answers do not yet have links to our sites. – J D Jul 19 '12 at 22:20
4

Alright, so after a little more thinking, I hypothesized that the real difference in performance is in the use of lists for keys as opposed to strings. Strings (and array) have much better cache coherency. So, I changed the key from a 'k list to a string and voila! Performance is now actually better than the Map in my application!

Here's the code -

[<RequireQualifiedAccess>]
module StringTrie

type Node<'v> =
    { TrieMap : Map<char, Node<'v>>
      TrieKvp : (string * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'v> = make Map.empty None

let inline isEmpty (node : Node<'v>) = node.IsEmpty

let rec tryFindInternal (key : string) index node =
    if key.Length = index then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let optSubNode = Map.tryFind key.[index] node.TrieMap
        match optSubNode with
        | Some subNode -> tryFindInternal key (index + 1) subNode
        | None -> None

let inline tryFind (key : string) node =
    tryFindInternal key 0 node

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : string) index value node =
    if key.Length = index then make node.TrieMap (Some (key, value))
    else
        let char = key.[index]
        let newTrie =
            match Map.tryFind char node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal key (index + 1) value newTrie
        make (Map.add char newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key 0 value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : string -> 'v -> 'a) (node : Node<'v>) : Node<'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

I also built a version that works for arrays in general and is also fast -

[<RequireQualifiedAccess>]
module ArrayTrie

type Node<'k, 'v when 'k : comparison> =
    { TrieMap : Map<'k, Node<'k, 'v>>
      TrieKvp : ('k array * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'k, 'v> = make Map.empty None

let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty

let rec tryFindInternal (key : 'k array) index node =
    if key.Length = index then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let optSubNode = Map.tryFind key.[index] node.TrieMap
        match optSubNode with
        | Some subNode -> tryFindInternal key (index + 1) subNode
        | None -> None

let inline tryFind (key : 'k array) node =
    tryFindInternal key 0 node

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : 'k array) index value node =
    if key.Length = index then make node.TrieMap (Some (key, value))
    else
        let char = key.[index]
        let newTrie =
            match Map.tryFind char node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal key (index + 1) value newTrie
        make (Map.add char newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key 0 value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : 'k array -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

The only thing left that seem like it would improve performance is to get an internal pointer to the string and inc that rather than doing indexes over and over. This doesn't seem easy in F#, but seems at least possible for arrays in C#.

Bryan Edds
  • 1,696
  • 12
  • 28
2

Why wouldn't it be? How about OCaml, is it any faster there? Since your Trie is implemented in terms of Map I would expect it to be slower than Map for at least some inputs. It can still perhaps outperform Map in some cases, for example when the size is very large.

Also, if your primary interest is lookup performance, why not freeze your Trie to use Dictionary-based nodes?

t0yv0
  • 4,714
  • 19
  • 36
  • It's hard to imagine inputs for which his trie is faster. The RB tree is balanced, making it _O(log n)_. Seems like his trie would be _O(m log n)_ where _m_ is the length of the key. – Daniel Jul 13 '12 at 14:30
  • @Daniel, the `n` is different in these two cases since trie nodes hold maps of a much smaller size than the total number of elements `N`. I think then trie complexity should be more like *O(m log N / h)* with `h` denoting the height of the trie. So it would be faster on shorter-than-average keys. – t0yv0 Jul 13 '12 at 14:44
  • toyvo - A trie should be faster where I use it, such as for string keys. The standard map has O (m / 2 * log n) performance where m is the length of the string key. This is because it uses String.compare for each comparison, meaning that it has do to a redundant comparison of the beginnings of each key that would be known to equal (think about it). This trie, on the other hand, will have O (log (m * n)) time as each compare doesn't do redundant comparisons (this is the reason to use a trie, after all). Since the map I use internally is a map of chars, _not strings_, its use may be misleading. – Bryan Edds Jul 13 '12 at 14:50
  • Also, I think it would be impossible to know when to freeze a dictionary in this type of application (the lookup is for symbols in an functional language's environment). – Bryan Edds Jul 13 '12 at 15:02
  • NOTE: I'm sorry, I think my big-o description of the trie performance is wrong. Still, I think it should be better than Map's O (m / 2 * log n)... I dunno. I've gotten confused now... Maybe it's roughly O (m * log 18) for my application (string name characters are usually alpha-numeric). – Bryan Edds Jul 13 '12 at 15:11
  • @BryanEdds, thanks for correcting, very appealing argument. Back to complexity: so map does 1 run of `log N` comparisons each costing `m/2`, so `O(m log N / 2)`. In contrast, trie does `m` runs of `log n` comparisons each costing `1`, that is `O(m log n)`. For trie of height `h` we have `N = n ** h`. Then, trie does `O(m log N / h)`. Sounds better. – t0yv0 Jul 13 '12 at 15:15
  • I think the trie's complexity is roughly O (m * log 18) for my application (string name characters are usually alpha-numeric, giving us 36 / 2 = 18). So if that is true, I should definitely be faster than Map of string to value. – Bryan Edds Jul 13 '12 at 15:17
  • Concerning freezing dictionaries - right, if your application keeps updating the map, hard to know when to do that. Persistence with dictionaries means `O(N)` copy-on-write. On the other hand you could blow some memory and automatically cache map lookup with a dict. Could help with read-dominated performance loads. – t0yv0 Jul 13 '12 at 15:20