diff --git a/src/lib_stdlib_unix/key_value_store.ml b/src/lib_stdlib_unix/key_value_store.ml index 51c708b992b76ace74307c6f0db96f6bee82bdb7..ac9e8f475a3fe0c2ab74242c79c4642774e26847 100644 --- a/src/lib_stdlib_unix/key_value_store.ml +++ b/src/lib_stdlib_unix/key_value_store.ml @@ -521,9 +521,7 @@ end = struct let remove_with_opened_file files lru filepath opened_file = let open Lwt_syntax in let* () = close_opened_file opened_file in - (* It may happen that the node was already evicted by a concurrent - action. Hence [LRU.remove] can fail. *) - (try LRU.remove lru opened_file.lru_node with _ -> ()) ; + LRU.remove lru opened_file.lru_node ; Table.remove files filepath ; Lwt_unix.unlink filepath @@ -558,9 +556,13 @@ end = struct | None -> on_file_closed ~on_file_opened | Some opened_file -> on_file_opened opened_file - let close_file files last_actions filepath = + let close_file files lru last_actions filepath = let on_file_closed ~on_file_opened:_ = Lwt.return_unit in - let on_file_opened opened_file = close_opened_file opened_file in + let on_file_opened opened_file = + let open Lwt_syntax in + let+ () = close_opened_file opened_file in + LRU.remove lru opened_file.lru_node + in generic_action files last_actions filepath ~on_file_closed ~on_file_opened let read ~on_file_closed files last_actions layout key = @@ -625,7 +627,7 @@ end = struct generic_action files last_actions filepath ~on_file_closed ~on_file_opened end - let close_file files last_actions filepath = + let close_file files lru last_actions filepath = (* Since this function does not aim to be exposed, we do not check whether the store is closed. This would actually be a mistake since it is used while the store is closing. @@ -642,7 +644,7 @@ end = struct (* [p] is a promise that triggers the action of closing the file. It is important to not wait on it so that we can update the store's last_actions atomically to ensure invariant (A). *) - let p = Action.close_file files last_actions filepath in + let p = Action.close_file files lru last_actions filepath in Table.replace files filepath (Closing p) ; Table.replace last_actions filepath (Close p) ; @@ -675,7 +677,7 @@ end = struct closed := true ; let* () = Table.iter_s - (fun filename _ -> close_file files last_actions filename) + (fun filename _ -> close_file files lru last_actions filename) files in LRU.clear lru ; @@ -691,39 +693,25 @@ end = struct let add_lru files last_actions lru filename = let open Lwt_syntax in let rec loop () = - let lru_node, remove = LRU.add_and_return_erased lru filename in - match remove with - | None -> return lru_node - | Some filepath -> - (* The reason why we remove the node is because of a deadlock - that may occur in the PBT test, but has almost 0 chance to - occur in practice. - - A deadlock can arise if the call to [close_file] below - requires to evict the current node. How can this happen? - With an LRU of size 1, this can happen if the [close_file] - function must wait the end of the current actions on going - for this file and those actions require to open the very - same file. How? Well for example if the on going actions - are [Remove] followed by a [Write]. This is stupid, but - possible (so generated by the PBT) and has very few chances - to appear in practice. Now if the LRU is of size [n], this - has even less chance to occur (you would need to generate a - circle of dependencies). *) - LRU.remove lru lru_node ; - (* We want to ensure that the number of file descriptors opened - is bounded by the size of the LRU. This is why we wait first - for the eviction promise to be fulfilled that will close the - file evicted. *) - let* () = close_file files last_actions filepath in - (* We call recursively to be sure that when trying to add the - node again, no node will be evicted. This strategy may - create in theory some starvation since while doing this - other nodes may have been added to the KVS, and - consequently, we may have to wait closing more files. In - practice, it should not be an issue: The starvation can - happen only if the store is busy all the time. *) - loop () + if LRU.capacity lru = LRU.length lru then + let node = LRU.oldest_element lru |> Stdlib.Option.get in + let filepath = LRU.data node in + (* We want to ensure that the number of file descriptors opened + is bounded by the size of the LRU. This is why we wait first + for the eviction promise to be fulfilled that will close the + file evicted. *) + let* () = close_file files lru last_actions filepath in + (* We call recursively to be sure that when trying to add the + node again, no node will be evicted. This strategy may + create in theory some starvation since while doing this + other nodes may have been added to the KVS, and + consequently, we may have to wait closing more files. In + practice, it should not be an issue: The starvation can + happen only if the store is busy all the time. *) + (loop [@ocaml.tailcall]) () + else + let lru_node = LRU.add lru filename in + Lwt.return lru_node in loop () @@ -815,7 +803,16 @@ end = struct in let p = Action.read ~on_file_closed files last_actions layout key in Table.replace last_actions layout.filepath (Read p) ; - let+ _file, value = p in + let+ file, value = p in + (match file with + | None -> ( + match Table.find_opt last_actions layout.filepath with + | Some (Read p) -> ( + match Lwt.state p with + | Lwt.Return _ -> Table.remove last_actions layout.filepath + | _ -> ()) + | _ -> ()) + | Some _ -> ()) ; value (* Very similar to [read] action except we only look at the bitset @@ -839,7 +836,16 @@ end = struct Action.value_exists ~on_file_closed files last_actions layout key in Table.replace last_actions layout.filepath (Value_exists p) ; - let+ _, exists = p in + let+ file, exists = p in + (match file with + | None -> ( + match Table.find_opt last_actions layout.filepath with + | Some (Read p) -> ( + match Lwt.state p with + | Lwt.Return _ -> Table.remove last_actions layout.filepath + | _ -> ()) + | _ -> ()) + | Some _ -> ()) ; exists (* Very similar to [value_exists] action except we look at the @@ -861,7 +867,16 @@ end = struct in let p = Action.count_values ~on_file_closed files last_actions layout in Table.replace last_actions layout.filepath (Count_values p) ; - let+ _, count = p in + let+ file, count = p in + (match file with + | None -> ( + match Table.find_opt last_actions layout.filepath with + | Some (Count_values p) -> ( + match Lwt.state p with + | Lwt.Return _ -> Table.remove last_actions layout.filepath + | _ -> ()) + | _ -> ()) + | Some _ -> ()) ; count let write ?(override = false) {files; last_actions; lru; closed} layout key @@ -897,10 +912,7 @@ end = struct if !closed then Lwt.return (Error (Error_monad.TzTrace.make (Closed {action = "remove"}))) else - let on_file_closed ~on_file_opened:_ = - let+ () = may_remove_file layout.filepath in - Table.remove files layout.filepath - in + let on_file_closed ~on_file_opened:_ = may_remove_file layout.filepath in let p = Action.remove_file ~on_file_closed @@ -910,11 +922,13 @@ end = struct layout.filepath in Table.replace last_actions layout.filepath (Remove p) ; - Table.replace files layout.filepath (Closing p) ; + (match Table.find_opt files layout.filepath with + | None -> () + | Some _ -> Table.replace files layout.filepath (Closing p)) ; let* () = p in (* See [close_file] for an explanation of the lines below. *) (match Table.find_opt last_actions layout.filepath with - | Some (Close p) -> ( + | Some (Close p) | Some (Remove p) -> ( match Lwt.state p with | Lwt.Return _ -> Table.remove last_actions layout.filepath | _ -> ()) diff --git a/src/lib_stdlib_unix/test/test_key_value_store_fuzzy.ml b/src/lib_stdlib_unix/test/test_key_value_store_fuzzy.ml index 3178bb7b265c4ed0531589839e556ed0b6a31c51..5dfc7ca039ed1585d9ee93ffe0cc87d8963af197 100644 --- a/src/lib_stdlib_unix/test/test_key_value_store_fuzzy.ml +++ b/src/lib_stdlib_unix/test/test_key_value_store_fuzzy.ml @@ -141,6 +141,8 @@ module type S = sig module View : sig val opened_files : ('file, 'key, 'value) t -> int + + val ongoing_actions : ('file, 'key, 'value) t -> int end end @@ -191,6 +193,10 @@ module R : S = struct let opened_files table = table |> Stdlib.Hashtbl.to_seq_keys |> Seq.map fst |> List.of_seq |> List.sort_uniq compare |> List.length + + let ongoing_actions table = + table |> Stdlib.Hashtbl.to_seq_keys |> Seq.map fst |> List.of_seq + |> List.sort_uniq compare |> List.length end end @@ -560,7 +566,26 @@ let run_scenario (function Ok () -> return_unit | Error err -> fail err) promises_running_seq in - run_actions action next_actions Seq_s.empty + (* After waiting for all the promises to be executed, the + number of opened files in theory should not exceed the + number of files in the LRU. *) + if L.View.opened_files left <= lru_size then + if L.View.ongoing_actions left <= lru_size then + run_actions action next_actions Seq_s.empty + else + failwith + "Expected size of actions table to be at most %d. Got %d \ + (remaining actions: %d)." + lru_size + (L.View.ongoing_actions left) + (List.length next_actions + 1) + else + failwith + "Expected size of files table to be at most %d. Got %d (remaining \ + actions: %d)." + lru_size + (L.View.opened_files left) + (List.length next_actions + 1) | (Parallel, action) :: next_actions -> (* We do not wait for promises to end and append them to the list of promises on-going. *)