{-
(c) Bartosz Nitka, Facebook, 2015

UniqDFM: Specialised deterministic finite maps, for things with @[email protected]

Basically, the things need to be in class @[email protected], and we use the
@[email protected] method to grab their @[email protected]

This is very similar to @[email protected], the major difference being that the order of
folding is not dependent on @[email protected] ordering, giving determinism.
Currently the ordering is determined by insertion order.

See Note [Unique Determinism] in Unique for explanation why @[email protected] ordering
is not deterministic.
-}

{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wall #-}

module UniqDFM (
        -- * Unique-keyed deterministic mappings
        UniqDFM,       -- abstract type

        -- ** Manipulating those mappings
        emptyUDFM,
        unitUDFM,
        addToUDFM,
        addToUDFM_C,
        addListToUDFM,
        delFromUDFM,
        delListFromUDFM,
        adjustUDFM,
        alterUDFM,
        mapUDFM,
        plusUDFM,
        plusUDFM_C,
        lookupUDFM, lookupUDFM_Directly,
        elemUDFM,
        foldUDFM,
        eltsUDFM,
        filterUDFM, filterUDFM_Directly,
        isNullUDFM,
        sizeUDFM,
        intersectUDFM, udfmIntersectUFM,
        intersectsUDFM,
        disjointUDFM, disjointUdfmUfm,
        minusUDFM,
        listToUDFM,
        udfmMinusUFM,
        partitionUDFM,
        anyUDFM, allUDFM,
        pprUDFM,

        udfmToList,
        udfmToUfm,
        nonDetFoldUDFM,
        alwaysUnsafeUfmToUdfm,
    ) where

import GhcPrelude

import Unique           ( Uniquable(..), Unique, getKey )
import Outputable

import qualified Data.IntMap as M
import Data.Data
import Data.List (sortBy)
import Data.Function (on)
import qualified Data.Semigroup as Semi
import UniqFM (UniqFM, listToUFM_Directly, nonDetUFMToList, ufmToIntMap)

-- Note [Deterministic UniqFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- A @[email protected] is just like @[email protected] with the following additional
-- property: the function `udfmToList` returns the elements in some
-- deterministic order not depending on the Unique key for those elements.
--
-- If the client of the map performs operations on the map in deterministic
-- order then `udfmToList` returns them in deterministic order.
--
-- There is an implementation cost: each element is given a serial number
-- as it is added, and `udfmToList` sorts it's result by this serial
-- number. So you should only use `UniqDFM` if you need the deterministic
-- property.
--
-- `foldUDFM` also preserves determinism.
--
-- Normal @[email protected] when you turn it into a list will use
-- Data.IntMap.toList function that returns the elements in the order of
-- the keys. The keys in @[email protected] are always @[email protected], so you end up with
-- with a list ordered by @[email protected]
-- The order of @[email protected] is known to be not stable across rebuilds.
-- See Note [Unique Determinism] in Unique.
--
--
-- There's more than one way to implement this. The implementation here tags
-- every value with the insertion time that can later be used to sort the
-- values when asked to convert to a list.
--
-- An alternative would be to have
--
--   data UniqDFM ele = UDFM (M.IntMap ele) [ele]
--
-- where the list determines the order. This makes deletion tricky as we'd
-- only accumulate elements in that list, but makes merging easier as you
-- can just merge both structures independently.
-- Deletion can probably be done in amortized fashion when the size of the
-- list is twice the size of the set.

-- | A type of values tagged with insertion time
data TaggedVal val =
  TaggedVal
    val
    {-# UNPACK #-} !Int -- ^ insertion time
  deriving Data

taggedFst :: TaggedVal val -> val
taggedFst (TaggedVal v _) = v

taggedSnd :: TaggedVal val -> Int
taggedSnd (TaggedVal _ i) = i

instance Eq val => Eq (TaggedVal val) where
  (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2

instance Functor TaggedVal where
  fmap f (TaggedVal val i) = TaggedVal (f val) i

-- | Type of unique deterministic finite maps
data UniqDFM ele =
  UDFM
    !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
                                -- values are tagged with insertion time.
                                -- The invariant is that all the tags will
                                -- be distinct within a single map
    {-# UNPACK #-} !Int         -- Upper bound on the values' insertion
                                -- time. See Note [Overflow on plusUDFM]
  deriving (Data, Functor)

emptyUDFM :: UniqDFM elt
emptyUDFM = UDFM M.empty 0

unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1

addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt  -> UniqDFM elt
addToUDFM m k v = addToUDFM_Directly m (getUnique k) v

addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
addToUDFM_Directly (UDFM m i) u v
  = UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
  where
    tf (TaggedVal new_v _) (TaggedVal _ old_i) = TaggedVal new_v old_i
      -- Keep the old tag, but insert the new value
      -- This means that udfmToList typically returns elements
      -- in the order of insertion, rather than the reverse

addToUDFM_Directly_C
  :: (elt -> elt -> elt)   -- old -> new -> result
  -> UniqDFM elt
  -> Unique -> elt
  -> UniqDFM elt
addToUDFM_Directly_C f (UDFM m i) u v
  = UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
    where
      tf (TaggedVal new_v _) (TaggedVal old_v old_i)
         = TaggedVal (f old_v new_v) old_i
          -- Flip the arguments, because M.insertWith uses  (new->old->result)
          --                         but f            needs (old->new->result)
          -- Like addToUDFM_Directly, keep the old tag

addToUDFM_C
  :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
  -> UniqDFM elt -- old
  -> key -> elt -- new
  -> UniqDFM elt -- result
addToUDFM_C f m k v = addToUDFM_Directly_C f m (getUnique k) v

addListToUDFM :: Uniquable key => UniqDFM elt -> [(key,elt)] -> UniqDFM elt
addListToUDFM = foldl (\m (k, v) -> addToUDFM m k v)

addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)

addListToUDFM_Directly_C
  :: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly_C f = foldl (\m (k, v) -> addToUDFM_Directly_C f m k v)

delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i

plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
  -- we will use the upper bound on the tag as a proxy for the set size,
  -- to insert the smaller one into the bigger one
  | i > j = insertUDFMIntoLeft_C f udfml udfmr
  | otherwise = insertUDFMIntoLeft_C f udfmr udfml

-- Note [Overflow on plusUDFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- There are multiple ways of implementing plusUDFM.
-- The main problem that needs to be solved is overlap on times of
-- insertion between different keys in two maps.
-- Consider:
--
-- A = fromList [(a, (x, 1))]
-- B = fromList [(b, (y, 1))]
--
-- If you merge them naively you end up with:
--
-- C = fromList [(a, (x, 1)), (b, (y, 1))]
--
-- Which loses information about ordering and brings us back into
-- non-deterministic world.
--
-- The solution I considered before would increment the tags on one of the
-- sets by the upper bound of the other set. The problem with this approach
-- is that you'll run out of tags for some merge patterns.
-- Say you start with A with upper bound 1, you merge A with A to get A' and
-- the upper bound becomes 2. You merge A' with A' and the upper bound
-- doubles again. After 64 merges you overflow.
-- This solution would have the same time complexity as plusUFM, namely O(n+m).
--
-- The solution I ended up with has time complexity of
-- O(m log m + m * min (n+m, W)) where m is the smaller set.
-- It simply inserts the elements of the smaller set into the larger
-- set in the order that they were inserted into the smaller set. That's
-- O(m log m) for extracting the elements from the smaller set in the
-- insertion order and O(m * min(n+m, W)) to insert them into the bigger
-- set.

plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
  -- we will use the upper bound on the tag as a proxy for the set size,
  -- to insert the smaller one into the bigger one
  | i > j = insertUDFMIntoLeft udfml udfmr
  | otherwise = insertUDFMIntoLeft udfmr udfml

insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr

insertUDFMIntoLeft_C
  :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft_C f udfml udfmr =
  addListToUDFM_Directly_C f udfml $ udfmToList udfmr

lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m

lookupUDFM_Directly :: UniqDFM elt -> Unique -> Maybe elt
lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m

elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m

-- | Performs a deterministic fold over the UniqDFM.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
foldUDFM k z m = foldr k z (eltsUDFM m)

-- | Performs a nondeterministic fold over the UniqDFM.
-- It's O(n), same as the corresponding function on `UniqFM`.
-- If you use this please provide a justification why it doesn't introduce
-- nondeterminism.
nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m

eltsUDFM :: UniqDFM elt -> [elt]
eltsUDFM (UDFM m _i) =
  map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m

filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i

filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
  where
  p' k (TaggedVal v _) = p (getUnique k) v

-- | Converts `UniqDFM` to a list, with elements in deterministic order.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
udfmToList :: UniqDFM elt -> [(Unique, elt)]
udfmToList (UDFM m _i) =
  [ (getUnique k, taggedFst v)
  | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]

isNullUDFM :: UniqDFM elt -> Bool
isNullUDFM (UDFM m _) = M.null m

sizeUDFM :: UniqDFM elt -> Int
sizeUDFM (UDFM m _i) = M.size m

intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
  -- M.intersection is left biased, that means the result will only have
  -- a subset of elements from the left set, so `i` is a good upper bound.

udfmIntersectUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
  -- M.intersection is left biased, that means the result will only have
  -- a subset of elements from the left set, so `i` is a good upper bound.

intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)

disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)

disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))

minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
  -- M.difference returns a subset of a left set, so `i` is a good upper
  -- bound.

udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
  -- M.difference returns a subset of a left set, so `i` is a good upper
  -- bound.

-- | Partition UniqDFM into two UniqDFMs according to the predicate
partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
partitionUDFM p (UDFM m i) =
  case M.partition (p . taggedFst) m of
    (left, right) -> (UDFM left i, UDFM right i)

-- | Delete a list of elements from a UniqDFM
delListFromUDFM  :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
delListFromUDFM = foldl delFromUDFM

-- | This allows for lossy conversion from UniqDFM to UniqFM
udfmToUfm :: UniqDFM elt -> UniqFM elt
udfmToUfm (UDFM m _i) =
  listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]

listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM elt
listToUDFM = foldl (\m (k, v) -> addToUDFM m k v) emptyUDFM

listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM

-- | Apply a function to a particular element
adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i

-- | The expression (alterUDFM f k map) alters value x at k, or absence
-- thereof. alterUDFM can be used to insert, delete, or update a value in
-- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
-- more efficient.
alterUDFM
  :: Uniquable key
  => (Maybe elt -> Maybe elt)  -- How to adjust
  -> UniqDFM elt               -- old
  -> key                       -- new
  -> UniqDFM elt               -- result
alterUDFM f (UDFM m i) k =
  UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
  where
  alterf Nothing = inject $ f Nothing
  alterf (Just (TaggedVal v _)) = inject $ f (Just v)
  inject Nothing = Nothing
  inject (Just v) = Just $ TaggedVal v i

-- | Map a function over every value in a UniqDFM
mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i

anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
anyUDFM p (UDFM m _i) = M.foldr ((||) . p . taggedFst) False m

allUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
allUDFM p (UDFM m _i) = M.foldr ((&&) . p . taggedFst) True m

instance Semi.Semigroup (UniqDFM a) where
  (<>) = plusUDFM

instance Monoid (UniqDFM a) where
  mempty = emptyUDFM
  mappend = (Semi.<>)

-- This should not be used in commited code, provided for convenience to
-- make ad-hoc conversions when developing
alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
alwaysUnsafeUfmToUdfm = listToUDFM_Directly . nonDetUFMToList

-- Output-ery

instance Outputable a => Outputable (UniqDFM a) where
    ppr ufm = pprUniqDFM ppr ufm

pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
pprUniqDFM ppr_elt ufm
  = brackets $ fsep $ punctuate comma $
    [ ppr uq <+> text ":->" <+> ppr_elt elt
    | (uq, elt) <- udfmToList ufm ]

pprUDFM :: UniqDFM a    -- ^ The things to be pretty printed
       -> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
       -> SDoc          -- ^ 'SDoc' where the things have been pretty
                        -- printed
pprUDFM ufm pp = pp (eltsUDFM ufm)