17 Oct, 2018

1 commit


13 Jun, 2018

2 commits

  • The vzalloc() function has no 2-factor argument form, so multiplication
    factors need to be wrapped in array_size(). This patch replaces cases of:

    vzalloc(a * b)

    with:
    vzalloc(array_size(a, b))

    as well as handling cases of:

    vzalloc(a * b * c)

    with:

    vzalloc(array3_size(a, b, c))

    This does, however, attempt to ignore constant size factors like:

    vzalloc(4 * 1024)

    though any constants defined via macros get caught up in the conversion.

    Any factors with a sizeof() of "unsigned char", "char", and "u8" were
    dropped, since they're redundant.

    The Coccinelle script used for this was:

    // Fix redundant parens around sizeof().
    @@
    type TYPE;
    expression THING, E;
    @@

    (
    vzalloc(
    - (sizeof(TYPE)) * E
    + sizeof(TYPE) * E
    , ...)
    |
    vzalloc(
    - (sizeof(THING)) * E
    + sizeof(THING) * E
    , ...)
    )

    // Drop single-byte sizes and redundant parens.
    @@
    expression COUNT;
    typedef u8;
    typedef __u8;
    @@

    (
    vzalloc(
    - sizeof(u8) * (COUNT)
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(__u8) * (COUNT)
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(char) * (COUNT)
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(unsigned char) * (COUNT)
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(u8) * COUNT
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(__u8) * COUNT
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(char) * COUNT
    + COUNT
    , ...)
    |
    vzalloc(
    - sizeof(unsigned char) * COUNT
    + COUNT
    , ...)
    )

    // 2-factor product with sizeof(type/expression) and identifier or constant.
    @@
    type TYPE;
    expression THING;
    identifier COUNT_ID;
    constant COUNT_CONST;
    @@

    (
    vzalloc(
    - sizeof(TYPE) * (COUNT_ID)
    + array_size(COUNT_ID, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * COUNT_ID
    + array_size(COUNT_ID, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * (COUNT_CONST)
    + array_size(COUNT_CONST, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * COUNT_CONST
    + array_size(COUNT_CONST, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * (COUNT_ID)
    + array_size(COUNT_ID, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * COUNT_ID
    + array_size(COUNT_ID, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * (COUNT_CONST)
    + array_size(COUNT_CONST, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * COUNT_CONST
    + array_size(COUNT_CONST, sizeof(THING))
    , ...)
    )

    // 2-factor product, only identifiers.
    @@
    identifier SIZE, COUNT;
    @@

    vzalloc(
    - SIZE * COUNT
    + array_size(COUNT, SIZE)
    , ...)

    // 3-factor product with 1 sizeof(type) or sizeof(expression), with
    // redundant parens removed.
    @@
    expression THING;
    identifier STRIDE, COUNT;
    type TYPE;
    @@

    (
    vzalloc(
    - sizeof(TYPE) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vzalloc(
    - sizeof(THING) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    )

    // 3-factor product with 2 sizeof(variable), with redundant parens removed.
    @@
    expression THING1, THING2;
    identifier COUNT;
    type TYPE1, TYPE2;
    @@

    (
    vzalloc(
    - sizeof(TYPE1) * sizeof(TYPE2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    vzalloc(
    - sizeof(THING1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    vzalloc(
    - sizeof(THING1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    |
    vzalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    )

    // 3-factor product, only identifiers, with redundant parens removed.
    @@
    identifier STRIDE, SIZE, COUNT;
    @@

    (
    vzalloc(
    - (COUNT) * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - COUNT * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - COUNT * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - (COUNT) * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - COUNT * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - (COUNT) * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - (COUNT) * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vzalloc(
    - COUNT * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    )

    // Any remaining multi-factor products, first at least 3-factor products
    // when they're not all constants...
    @@
    expression E1, E2, E3;
    constant C1, C2, C3;
    @@

    (
    vzalloc(C1 * C2 * C3, ...)
    |
    vzalloc(
    - E1 * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    )

    // And then all remaining 2 factors products when they're not all constants.
    @@
    expression E1, E2;
    constant C1, C2;
    @@

    (
    vzalloc(C1 * C2, ...)
    |
    vzalloc(
    - E1 * E2
    + array_size(E1, E2)
    , ...)
    )

    Signed-off-by: Kees Cook

    Kees Cook
     
  • The vmalloc() function has no 2-factor argument form, so multiplication
    factors need to be wrapped in array_size(). This patch replaces cases of:

    vmalloc(a * b)

    with:
    vmalloc(array_size(a, b))

    as well as handling cases of:

    vmalloc(a * b * c)

    with:

    vmalloc(array3_size(a, b, c))

    This does, however, attempt to ignore constant size factors like:

    vmalloc(4 * 1024)

    though any constants defined via macros get caught up in the conversion.

    Any factors with a sizeof() of "unsigned char", "char", and "u8" were
    dropped, since they're redundant.

    The Coccinelle script used for this was:

    // Fix redundant parens around sizeof().
    @@
    type TYPE;
    expression THING, E;
    @@

    (
    vmalloc(
    - (sizeof(TYPE)) * E
    + sizeof(TYPE) * E
    , ...)
    |
    vmalloc(
    - (sizeof(THING)) * E
    + sizeof(THING) * E
    , ...)
    )

    // Drop single-byte sizes and redundant parens.
    @@
    expression COUNT;
    typedef u8;
    typedef __u8;
    @@

    (
    vmalloc(
    - sizeof(u8) * (COUNT)
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(__u8) * (COUNT)
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(char) * (COUNT)
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(unsigned char) * (COUNT)
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(u8) * COUNT
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(__u8) * COUNT
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(char) * COUNT
    + COUNT
    , ...)
    |
    vmalloc(
    - sizeof(unsigned char) * COUNT
    + COUNT
    , ...)
    )

    // 2-factor product with sizeof(type/expression) and identifier or constant.
    @@
    type TYPE;
    expression THING;
    identifier COUNT_ID;
    constant COUNT_CONST;
    @@

    (
    vmalloc(
    - sizeof(TYPE) * (COUNT_ID)
    + array_size(COUNT_ID, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * COUNT_ID
    + array_size(COUNT_ID, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * (COUNT_CONST)
    + array_size(COUNT_CONST, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * COUNT_CONST
    + array_size(COUNT_CONST, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * (COUNT_ID)
    + array_size(COUNT_ID, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * COUNT_ID
    + array_size(COUNT_ID, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * (COUNT_CONST)
    + array_size(COUNT_CONST, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * COUNT_CONST
    + array_size(COUNT_CONST, sizeof(THING))
    , ...)
    )

    // 2-factor product, only identifiers.
    @@
    identifier SIZE, COUNT;
    @@

    vmalloc(
    - SIZE * COUNT
    + array_size(COUNT, SIZE)
    , ...)

    // 3-factor product with 1 sizeof(type) or sizeof(expression), with
    // redundant parens removed.
    @@
    expression THING;
    identifier STRIDE, COUNT;
    type TYPE;
    @@

    (
    vmalloc(
    - sizeof(TYPE) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(TYPE))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * (COUNT) * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * (COUNT) * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * COUNT * (STRIDE)
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    |
    vmalloc(
    - sizeof(THING) * COUNT * STRIDE
    + array3_size(COUNT, STRIDE, sizeof(THING))
    , ...)
    )

    // 3-factor product with 2 sizeof(variable), with redundant parens removed.
    @@
    expression THING1, THING2;
    identifier COUNT;
    type TYPE1, TYPE2;
    @@

    (
    vmalloc(
    - sizeof(TYPE1) * sizeof(TYPE2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
    , ...)
    |
    vmalloc(
    - sizeof(THING1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    vmalloc(
    - sizeof(THING1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(THING1), sizeof(THING2))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * COUNT
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    |
    vmalloc(
    - sizeof(TYPE1) * sizeof(THING2) * (COUNT)
    + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
    , ...)
    )

    // 3-factor product, only identifiers, with redundant parens removed.
    @@
    identifier STRIDE, SIZE, COUNT;
    @@

    (
    vmalloc(
    - (COUNT) * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - COUNT * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - COUNT * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - (COUNT) * (STRIDE) * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - COUNT * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - (COUNT) * STRIDE * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - (COUNT) * (STRIDE) * (SIZE)
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    |
    vmalloc(
    - COUNT * STRIDE * SIZE
    + array3_size(COUNT, STRIDE, SIZE)
    , ...)
    )

    // Any remaining multi-factor products, first at least 3-factor products
    // when they're not all constants...
    @@
    expression E1, E2, E3;
    constant C1, C2, C3;
    @@

    (
    vmalloc(C1 * C2 * C3, ...)
    |
    vmalloc(
    - E1 * E2 * E3
    + array3_size(E1, E2, E3)
    , ...)
    )

    // And then all remaining 2 factors products when they're not all constants.
    @@
    expression E1, E2;
    constant C1, C2;
    @@

    (
    vmalloc(C1 * C2, ...)
    |
    vmalloc(
    - E1 * E2
    + array_size(E1, E2)
    , ...)
    )

    Signed-off-by: Kees Cook

    Kees Cook
     

11 Nov, 2017

4 commits


15 May, 2017

5 commits


04 May, 2017

2 commits

  • Depending on the passed @idle arg, there may be no need to calculate
    'nr_free' or 'nr_clean' respectively in free_target_met() and
    clean_target_met().

    Signed-off-by: Mike Snitzer

    Mike Snitzer
     
  • dm-cache's smq policy tries hard to do it's work during the idle periods
    when there is no IO. But if there are no idle periods (eg, a long fio
    run) we still need to allow some demotions and promotions to occur.

    To achieve this, pass @idle=true to queue_promotion()'s
    free_target_met() call so that free_target_met() doesn't short-circuit
    the possibility of demotion simply because it isn't an idle period.

    Fixes: b29d4986d0 ("dm cache: significant rework to leverage dm-bio-prison-v2")
    Reported-by: John Harrigan
    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     

31 Mar, 2017

1 commit


08 Mar, 2017

1 commit

  • The cache policy interfaces have been updated to work well with the new
    bio-prison v2 interface's ability to queue work immediately (promotion,
    demotion, etc) -- overriding benefit being reduced latency on processing
    IO through the cache. Previously such work would be left for the DM
    cache core to queue on various lists and then process in batches later
    -- this caused a serious delay in latency for IO driven by the cache.

    The background tracker code was factored out so that all cache policies
    can make use of it.

    Also, the "cleaner" policy has been removed and is now a variant of the
    smq policy that simply disallows migrations.

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     

09 Dec, 2016

1 commit


22 Sep, 2016

2 commits

  • For smq the 32 bit 'hint' stores the multiqueue level that the entry
    should be stored in. If a different policy has been used previously,
    and then switched to smq, the hints will be invalid. In which case we
    used to put all entries in the bottom level of the multiqueue, and then
    redistribute. Redistribution is faster if we put entries with invalid
    hints in random levels initially.

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     
  • It's far quicker to always delete the hint array and recreate with
    dm_array_new() because we avoid the copying caused by mutation.

    Also simplifies the policy interface, replacing the walk_hints() with
    the simpler get_hint().

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     

11 Mar, 2016

2 commits


01 Nov, 2015

1 commit

  • ffs counts bit starting with 1 (for the least significant bit), __ffs
    counts bits starting with 0. This patch changes various occurrences of ffs
    to __ffs and removes subtraction of 1 from the result.

    Note that __ffs (unlike ffs) is not defined when called with zero
    argument, but it is not called with zero argument in any of these cases.

    Signed-off-by: Mikulas Patocka
    Signed-off-by: Mike Snitzer

    Mikulas Patocka
     

12 Aug, 2015

2 commits

  • We no longer sleep in any of the smq functions, so this can become a
    spinlock. Switching from mutex to spinlock improves performance when
    the fast cache device is a very low latency device (e.g. NVMe SSD).

    The switch to spinlock also allows for removal of the extra tick_lock;
    which is no longer needed since the main lock being a spinlock now
    fulfills the locking requirements needed by interrupt context.

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     
  • When creating dm-cache with the default policy, it will call
    request_module("dm-cache-default") to register the default policy.
    But the "dm-cache-default" alias was left referring to the MQ policy.
    Fix this by moving the module alias to SMQ.

    Fixes: bccab6a0 (dm cache: switch the "default" cache replacement policy from mq to smq)
    Signed-off-by: Yi Zhang
    Signed-off-by: Mike Snitzer

    Yi Zhang
     

27 Jul, 2015

1 commit


26 Jun, 2015

1 commit


18 Jun, 2015

1 commit

  • The Stochastic multiqueue (SMQ) policy (vs MQ) offers the promise of
    less memory utilization, improved performance and increased adaptability
    in the face of changing workloads. SMQ also does not have any
    cumbersome tuning knobs.

    Users may switch from "mq" to "smq" simply by appropriately reloading a
    DM table that is using the cache target. Doing so will cause all of the
    mq policy's hints to be dropped. Also, performance of the cache may
    degrade slightly until smq recalculates the origin device's hotspots
    that should be cached.

    In the future the "mq" policy will just silently make use of "smq" and
    the mq code will be removed.

    Signed-off-by: Mike Snitzer
    Acked-by: Joe Thornber

    Mike Snitzer
     

12 Jun, 2015

2 commits

  • The policy tick() method is normally called from interrupt context.
    Both the mq and smq policies do some bottom half work for the tick
    method in their map functions. However if no IO is going through the
    cache, then that bottom half work doesn't occur. With these policies
    this means recently hit entries do not age and do not get written
    back as early as we'd like.

    Fix this by introducing a new 'can_block' parameter to the tick()
    method. When this is set the bottom half work occurs immediately.
    'can_block' is set when the tick method is called every second by the
    core target (not in interrupt context).

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber
     
  • The stochastic-multi-queue (smq) policy addresses some of the problems
    with the current multiqueue (mq) policy.

    Memory usage
    ------------

    The mq policy uses a lot of memory; 88 bytes per cache block on a 64
    bit machine.

    SMQ uses 28bit indexes to implement it's data structures rather than
    pointers. It avoids storing an explicit hit count for each block. It
    has a 'hotspot' queue rather than a pre cache which uses a quarter of
    the entries (each hotspot block covers a larger area than a single
    cache block).

    All these mean smq uses ~25bytes per cache block. Still a lot of
    memory, but a substantial improvement nontheless.

    Level balancing
    ---------------

    MQ places entries in different levels of the multiqueue structures
    based on their hit count (~ln(hit count)). This means the bottom
    levels generally have the most entries, and the top ones have very
    few. Having unbalanced levels like this reduces the efficacy of the
    multiqueue.

    SMQ does not maintain a hit count, instead it swaps hit entries with
    the least recently used entry from the level above. The over all
    ordering being a side effect of this stochastic process. With this
    scheme we can decide how many entries occupy each multiqueue level,
    resulting in better promotion/demotion decisions.

    Adaptability
    ------------

    The MQ policy maintains a hit count for each cache block. For a
    different block to get promoted to the cache it's hit count has to
    exceed the lowest currently in the cache. This means it can take a
    long time for the cache to adapt between varying IO patterns.
    Periodically degrading the hit counts could help with this, but I
    haven't found a nice general solution.

    SMQ doesn't maintain hit counts, so a lot of this problem just goes
    away. In addition it tracks performance of the hotspot queue, which
    is used to decide which blocks to promote. If the hotspot queue is
    performing badly then it starts moving entries more quickly between
    levels. This lets it adapt to new IO patterns very quickly.

    Performance
    -----------

    In my tests SMQ shows substantially better performance than MQ. Once
    this matures a bit more I'm sure it'll become the default policy.

    Signed-off-by: Joe Thornber
    Signed-off-by: Mike Snitzer

    Joe Thornber