Blame view
block/bfq-iosched.h
36.7 KB
a497ee34a block: switch all... |
1 |
/* SPDX-License-Identifier: GPL-2.0-or-later */ |
ea25da480 block, bfq: split... |
2 3 4 |
/* * Header file for the BFQ I/O scheduler: data structures and * prototypes of interface functions among BFQ components. |
ea25da480 block, bfq: split... |
5 6 7 8 9 10 11 |
*/ #ifndef _BFQ_H #define _BFQ_H #include <linux/blktrace_api.h> #include <linux/hrtimer.h> #include <linux/blk-cgroup.h> |
1d156646e blk-cgroup: separ... |
12 |
#include "blk-cgroup-rwstat.h" |
ea25da480 block, bfq: split... |
13 14 15 16 17 18 19 20 21 22 23 24 |
#define BFQ_IOPRIO_CLASSES 3 #define BFQ_CL_IDLE_TIMEOUT (HZ/5) #define BFQ_MIN_WEIGHT 1 #define BFQ_MAX_WEIGHT 1000 #define BFQ_WEIGHT_CONVERSION_COEFF 10 #define BFQ_DEFAULT_QUEUE_IOPRIO 4 #define BFQ_WEIGHT_LEGACY_DFL 100 #define BFQ_DEFAULT_GRP_IOPRIO 0 #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE |
1e66413c4 block, bfq: print... |
25 |
#define MAX_PID_STR_LENGTH 12 |
ea25da480 block, bfq: split... |
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
/* * Soft real-time applications are extremely more latency sensitive * than interactive ones. Over-raise the weight of the former to * privilege them against the latter. */ #define BFQ_SOFTRT_WEIGHT_FACTOR 100 struct bfq_entity; /** * struct bfq_service_tree - per ioprio_class service tree. * * Each service tree represents a B-WF2Q+ scheduler on its own. Each * ioprio_class has its own independent scheduler, and so its own * bfq_service_tree. All the fields are protected by the queue lock * of the containing bfqd. */ struct bfq_service_tree { /* tree for active entities (i.e., those backlogged) */ struct rb_root active; |
38c914074 bfq: fix typos in... |
46 |
/* tree for idle entities (i.e., not backlogged, with V < F_i)*/ |
ea25da480 block, bfq: split... |
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
struct rb_root idle; /* idle entity with minimum F_i */ struct bfq_entity *first_idle; /* idle entity with maximum F_i */ struct bfq_entity *last_idle; /* scheduler virtual time */ u64 vtime; /* scheduler weight sum; active and idle entities contribute to it */ unsigned long wsum; }; /** * struct bfq_sched_data - multi-class scheduler. * * bfq_sched_data is the basic scheduler queue. It supports three * ioprio_classes, and can be used either as a toplevel queue or as an |
46d556e6a block, bfq: consi... |
65 |
* intermediate queue in a hierarchical setup. |
ea25da480 block, bfq: split... |
66 67 68 69 70 71 |
* * The supported ioprio_classes are the same as in CFQ, in descending * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. * Requests from higher priority queues are served before all the * requests from lower priority queues; among requests of the same * queue requests are served according to B-WF2Q+. |
46d556e6a block, bfq: consi... |
72 73 74 75 76 77 78 79 80 81 82 |
* * The schedule is implemented by the service trees, plus the field * @next_in_service, which points to the entity on the active trees * that will be served next, if 1) no changes in the schedule occurs * before the current in-service entity is expired, 2) the in-service * queue becomes idle when it expires, and 3) if the entity pointed by * in_service_entity is not a queue, then the in-service child entity * of the entity pointed by in_service_entity becomes idle on * expiration. This peculiar definition allows for the following * optimization, not yet exploited: while a given entity is still in * service, we already know which is the best candidate for next |
636b8fe86 block, bfq: fix s... |
83 |
* service among the other active entities in the same parent |
46d556e6a block, bfq: consi... |
84 85 86 87 |
* entity. We can then quickly compare the timestamps of the * in-service entity with those of such best candidate. * * All fields are protected by the lock of the containing bfqd. |
ea25da480 block, bfq: split... |
88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
*/ struct bfq_sched_data { /* entity in service */ struct bfq_entity *in_service_entity; /* head-of-line entity (see comments above) */ struct bfq_entity *next_in_service; /* array of service trees, one per ioprio_class */ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; /* last time CLASS_IDLE was served */ unsigned long bfq_class_idle_last_service; }; /** |
2d29c9f89 block, bfq: impro... |
102 |
* struct bfq_weight_counter - counter of the number of all active queues |
ea25da480 block, bfq: split... |
103 104 105 |
* with a given weight. */ struct bfq_weight_counter { |
2d29c9f89 block, bfq: impro... |
106 107 |
unsigned int weight; /* weight of the queues this counter refers to */ unsigned int num_active; /* nr of active queues with this weight */ |
ea25da480 block, bfq: split... |
108 |
/* |
2d29c9f89 block, bfq: impro... |
109 |
* Weights tree member (see bfq_data's @queue_weights_tree) |
ea25da480 block, bfq: split... |
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
*/ struct rb_node weights_node; }; /** * struct bfq_entity - schedulable entity. * * A bfq_entity is used to represent either a bfq_queue (leaf node in the * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each * entity belongs to the sched_data of the parent group in the cgroup * hierarchy. Non-leaf entities have also their own sched_data, stored * in @my_sched_data. * * Each entity stores independently its priority values; this would * allow different weights on different devices, but this * functionality is not exported to userspace by now. Priorities and * weights are updated lazily, first storing the new values into the * new_* fields, then setting the @prio_changed flag. As soon as * there is a transition in the entity state that allows the priority * update to take place the effective and the requested priority * values are synchronized. * * Unless cgroups are used, the weight value is calculated from the * ioprio to export the same interface as CFQ. When dealing with |
636b8fe86 block, bfq: fix s... |
134 |
* "well-behaved" queues (i.e., queues that do not spend too much |
ea25da480 block, bfq: split... |
135 136 137 138 139 140 141 142 143 |
* time to consume their budget and have true sequential behavior, and * when there are no external factors breaking anticipation) the * relative weights at each level of the cgroups hierarchy should be * guaranteed. All the fields are protected by the queue lock of the * containing bfqd. */ struct bfq_entity { /* service_tree member */ struct rb_node rb_node; |
ea25da480 block, bfq: split... |
144 145 146 147 148 |
/* * Flag, true if the entity is on a tree (either the active or * the idle one of its service_tree) or is in service. */ |
33a16a980 block, bfq: exten... |
149 |
bool on_st_or_in_serv; |
ea25da480 block, bfq: split... |
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
/* B-WF2Q+ start and finish timestamps [sectors/weight] */ u64 start, finish; /* tree the entity is enqueued into; %NULL if not on a tree */ struct rb_root *tree; /* * minimum start time of the (active) subtree rooted at this * entity; used for O(log N) lookups into active trees */ u64 min_start; /* amount of service received during the last service slot */ int service; /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */ int budget; |
795fe54c2 bfq: Add per-devi... |
168 169 170 |
/* device weight, if non-zero, it overrides the default weight of * bfq_group_data */ int dev_weight; |
ea25da480 block, bfq: split... |
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
/* weight of the queue */ int weight; /* next weight if a change is in progress */ int new_weight; /* original weight, used to implement weight boosting */ int orig_weight; /* parent entity, for hierarchical scheduling */ struct bfq_entity *parent; /* * For non-leaf nodes in the hierarchy, the associated * scheduler queue, %NULL on leaf nodes. */ struct bfq_sched_data *my_sched_data; /* the scheduler queue this entity belongs to */ struct bfq_sched_data *sched_data; /* flag, set to request a weight, ioprio or ioprio_class change */ int prio_changed; |
ba7aeae55 block, bfq: fix d... |
192 193 194 |
/* flag, set if the entity is counted in groups_with_pending_reqs */ bool in_groups_with_pending_reqs; |
ea25da480 block, bfq: split... |
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
}; struct bfq_group; /** * struct bfq_ttime - per process thinktime stats. */ struct bfq_ttime { /* completion time of the last request */ u64 last_end_request; /* total process thinktime */ u64 ttime_total; /* number of thinktime samples */ unsigned long ttime_samples; /* average process thinktime */ u64 ttime_mean; }; /** * struct bfq_queue - leaf schedulable entity. * * A bfq_queue is a leaf request queue; it can be associated with an * io_context or more, if it is async or shared between cooperating * processes. @cgroup holds a reference to the cgroup, to be sure that it * does not disappear while a bfqq still references it (mostly to avoid * races between request issuing and task migration followed by cgroup * destruction). * All the fields are protected by the queue lock of the containing bfqd. */ struct bfq_queue { /* reference counter */ int ref; /* parent bfq_data */ struct bfq_data *bfqd; /* current ioprio and ioprio class */ unsigned short ioprio, ioprio_class; /* next ioprio and ioprio class if a change is in progress */ unsigned short new_ioprio, new_ioprio_class; |
2341d662e block, bfq: tune ... |
235 236 237 238 239 240 |
/* last total-service-time sample, see bfq_update_inject_limit() */ u64 last_serv_time_ns; /* limit for request injection */ unsigned int inject_limit; /* last time the inject limit has been decreased, in jiffies */ unsigned long decrease_time_jif; |
ea25da480 block, bfq: split... |
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
/* * Shared bfq_queue if queue is cooperating with one or more * other queues. */ struct bfq_queue *new_bfqq; /* request-position tree member (see bfq_group's @rq_pos_tree) */ struct rb_node pos_node; /* request-position tree root (see bfq_group's @rq_pos_tree) */ struct rb_root *pos_root; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ struct request *next_rq; /* number of sync and async requests queued */ int queued[2]; /* number of requests currently allocated */ int allocated; /* number of pending metadata requests */ int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; /* entity representing this queue in the scheduler */ struct bfq_entity entity; |
2d29c9f89 block, bfq: impro... |
266 267 |
/* pointer to the weight counter associated with this entity */ struct bfq_weight_counter *weight_counter; |
ea25da480 block, bfq: split... |
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
/* maximum budget allowed from the feedback mechanism */ int max_budget; /* budget expiration (in jiffies) */ unsigned long budget_timeout; /* number of requests on the dispatch list or inside driver */ int dispatched; /* status flags */ unsigned long flags; /* node for active/idle bfqq list inside parent bfqd */ struct list_head bfqq_list; /* associated @bfq_ttime struct */ struct bfq_ttime ttime; /* bit vector: a 1 for each seeky requests in history */ u32 seek_history; /* node for the device's burst list */ struct hlist_node burst_list_node; /* position of the last request enqueued */ sector_t last_request_pos; /* Number of consecutive pairs of request completion and * arrival, such that the queue becomes idle after the * completion, but the next request arrives within an idle * time slice; used only if the queue's IO_bound flag has been * cleared. */ unsigned int requests_within_timer; /* pid of the process owning the queue, used for logging purposes */ pid_t pid; /* * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL * if the queue is shared. */ struct bfq_io_cq *bic; /* current maximum weight-raising time for this queue */ unsigned long wr_cur_max_time; /* * Minimum time instant such that, only if a new request is * enqueued after this time instant in an idle @bfq_queue with * no outstanding requests, then the task associated with the * queue it is deemed as soft real-time (see the comments on * the function bfq_bfqq_softrt_next_start()) */ unsigned long soft_rt_next_start; /* * Start time of the current weight-raising period if * the @bfq-queue is being weight-raised, otherwise * finish time of the last weight-raising period. */ unsigned long last_wr_start_finish; /* factor by which the weight of this queue is multiplied */ unsigned int wr_coeff; /* * Time of the last transition of the @bfq_queue from idle to * backlogged. */ unsigned long last_idle_bklogged; /* * Cumulative service received from the @bfq_queue since the * last transition from idle to backlogged. */ unsigned long service_from_backlogged; |
8a8747dc0 block, bfq: limit... |
339 340 341 342 343 |
/* * Cumulative service received from the @bfq_queue since its * last transition to weight-raised state. */ unsigned long service_from_wr; |
ea25da480 block, bfq: split... |
344 345 346 347 348 349 350 |
/* * Value of wr start time when switching to soft rt */ unsigned long wr_start_at_switch_to_srt; unsigned long split_time; /* time of last split */ |
7b8fa3b90 block, bfq: let a... |
351 352 |
unsigned long first_IO_time; /* time of first I/O for this queue */ |
d0edc2473 block, bfq: injec... |
353 354 355 |
/* max service rate measured so far */ u32 max_service_rate; |
13a857a4c block, bfq: detec... |
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
/* * Pointer to the waker queue for this queue, i.e., to the * queue Q such that this queue happens to get new I/O right * after some I/O request of Q is completed. For details, see * the comments on the choice of the queue for injection in * bfq_select_queue(). */ struct bfq_queue *waker_bfqq; /* node for woken_list, see below */ struct hlist_node woken_list_node; /* * Head of the list of the woken queues for this queue, i.e., * of the list of the queues for which this queue is a waker * queue. This list is used to reset the waker_bfqq pointer in * the woken queues when this queue exits. */ struct hlist_head woken_list; |
ea25da480 block, bfq: split... |
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 |
}; /** * struct bfq_io_cq - per (request_queue, io_context) structure. */ struct bfq_io_cq { /* associated io_cq structure */ struct io_cq icq; /* must be the first member */ /* array of two process queues, the sync and the async */ struct bfq_queue *bfqq[2]; /* per (request_queue, blkcg) ioprio */ int ioprio; #ifdef CONFIG_BFQ_GROUP_IOSCHED uint64_t blkcg_serial_nr; /* the current blkcg serial */ #endif /* |
d5be3fefc block,bfq: refact... |
390 391 392 |
* Snapshot of the has_short_time flag before merging; taken * to remember its value while the queue is merged, so as to * be able to restore it in case of split. |
ea25da480 block, bfq: split... |
393 |
*/ |
d5be3fefc block,bfq: refact... |
394 |
bool saved_has_short_ttime; |
ea25da480 block, bfq: split... |
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 |
/* * Same purpose as the previous two fields for the I/O bound * classification of a queue. */ bool saved_IO_bound; /* * Same purpose as the previous fields for the value of the * field keeping the queue's belonging to a large burst */ bool saved_in_large_burst; /* * True if the queue belonged to a burst list before its merge * with another cooperating queue. */ bool was_in_burst_list; /* |
fffca087d block, bfq: save ... |
413 414 415 416 417 418 419 420 421 |
* Save the weight when a merge occurs, to be able * to restore it in case of split. If the weight is not * correctly resumed when the queue is recycled, * then the weight of the recycled queue could differ * from the weight of the original queue. */ unsigned int saved_weight; /* |
ea25da480 block, bfq: split... |
422 423 424 425 426 427 428 429 |
* Similar to previous fields: save wr information. */ unsigned long saved_wr_coeff; unsigned long saved_last_wr_start_finish; unsigned long saved_wr_start_at_switch_to_srt; unsigned int saved_wr_cur_max_time; struct bfq_ttime saved_ttime; }; |
ea25da480 block, bfq: split... |
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 |
/** * struct bfq_data - per-device data structure. * * All the fields are protected by @lock. */ struct bfq_data { /* device request queue */ struct request_queue *queue; /* dispatch queue */ struct list_head dispatch; /* root bfq_group for the device */ struct bfq_group *root_group; /* * rbtree of weight counters of @bfq_queues, sorted by * weight. Used to keep track of whether all @bfq_queues have * the same weight. The tree contains one counter for each * distinct weight associated to some active and not * weight-raised @bfq_queue (see the comments to the functions * bfq_weights_tree_[add|remove] for further details). */ |
fb53ac6cd block, bfq: do no... |
452 |
struct rb_root_cached queue_weights_tree; |
ba7aeae55 block, bfq: fix d... |
453 |
|
ea25da480 block, bfq: split... |
454 |
/* |
ba7aeae55 block, bfq: fix d... |
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 |
* Number of groups with at least one descendant process that * has at least one request waiting for completion. Note that * this accounts for also requests already dispatched, but not * yet completed. Therefore this number of groups may differ * (be larger) than the number of active groups, as a group is * considered active only if its corresponding entity has * descendant queues with at least one request queued. This * number is used to decide whether a scenario is symmetric. * For a detailed explanation see comments on the computation * of the variable asymmetric_scenario in the function * bfq_better_to_idle(). * * However, it is hard to compute this number exactly, for * groups with multiple descendant processes. Consider a group * that is inactive, i.e., that has no descendant process with * pending I/O inside BFQ queues. Then suppose that * num_groups_with_pending_reqs is still accounting for this * group, because the group has descendant processes with some * I/O request still in flight. num_groups_with_pending_reqs * should be decremented when the in-flight request of the * last descendant process is finally completed (assuming that * nothing else has changed for the group in the meantime, in * terms of composition of the group and active/inactive state of child * groups and processes). To accomplish this, an additional * pending-request counter must be added to entities, and must * be updated correctly. To avoid this additional field and operations, * we resort to the following tradeoff between simplicity and * accuracy: for an inactive group that is still counted in * num_groups_with_pending_reqs, we decrement * num_groups_with_pending_reqs when the first descendant * process of the group remains with no request waiting for * completion. * * Even this simpler decrement strategy requires a little * carefulness: to avoid multiple decrements, we flag a group, * more precisely an entity representing a group, as still * counted in num_groups_with_pending_reqs when it becomes * inactive. Then, when the first descendant queue of the * entity remains with no request waiting for completion, * num_groups_with_pending_reqs is decremented, and this flag * is reset. After this flag is reset for the entity, * num_groups_with_pending_reqs won't be decremented any * longer in case a new descendant queue of the entity remains * with no request waiting for completion. |
ea25da480 block, bfq: split... |
499 |
*/ |
ba7aeae55 block, bfq: fix d... |
500 |
unsigned int num_groups_with_pending_reqs; |
ea25da480 block, bfq: split... |
501 502 |
/* |
73d581184 block, bfq: consi... |
503 504 505 |
* Per-class (RT, BE, IDLE) number of bfq_queues containing * requests (including the queue in service, even if it is * idling). |
ea25da480 block, bfq: split... |
506 |
*/ |
73d581184 block, bfq: consi... |
507 |
unsigned int busy_queues[3]; |
ea25da480 block, bfq: split... |
508 509 510 511 512 513 |
/* number of weight-raised busy @bfq_queues */ int wr_busy_queues; /* number of queued requests */ int queued; /* number of requests dispatched and waiting for completion */ int rq_in_driver; |
8cacc5ab3 block, bfq: do no... |
514 515 |
/* true if the device is non rotational and performs queueing */ bool nonrot_with_queueing; |
ea25da480 block, bfq: split... |
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 |
/* * Maximum number of requests in driver in the last * @hw_tag_samples completed requests. */ int max_rq_in_driver; /* number of samples used to calculate hw_tag */ int hw_tag_samples; /* flag set to one if the driver is showing a queueing behavior */ int hw_tag; /* number of budgets assigned */ int budgets_assigned; /* * Timer set when idling (waiting) for the next request from * the queue in service. */ struct hrtimer idle_slice_timer; /* bfq_queue in service */ struct bfq_queue *in_service_queue; /* on-disk position of the last served request */ sector_t last_position; |
058fdecc6 block, bfq: fix i... |
540 541 |
/* position of the last served request for the in-service queue */ sector_t in_serv_last_pos; |
ea25da480 block, bfq: split... |
542 543 |
/* time of last request completion (ns) */ u64 last_completion; |
13a857a4c block, bfq: detec... |
544 545 |
/* bfqq owning the last completed rq */ struct bfq_queue *last_completed_rq_bfqq; |
2341d662e block, bfq: tune ... |
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 |
/* time of last transition from empty to non-empty (ns) */ u64 last_empty_occupied_ns; /* * Flag set to activate the sampling of the total service time * of a just-arrived first I/O request (see * bfq_update_inject_limit()). This will cause the setting of * waited_rq when the request is finally dispatched. */ bool wait_dispatch; /* * If set, then bfq_update_inject_limit() is invoked when * waited_rq is eventually completed. */ struct request *waited_rq; /* * True if some request has been injected during the last service hole. */ bool rqs_injected; |
ea25da480 block, bfq: split... |
565 566 567 568 569 570 571 572 573 |
/* time of first rq dispatch in current observation interval (ns) */ u64 first_dispatch; /* time of last rq dispatch in current observation interval (ns) */ u64 last_dispatch; /* beginning of the last budget */ ktime_t last_budget_start; /* beginning of the last idle slice */ ktime_t last_idling_start; |
2341d662e block, bfq: tune ... |
574 |
unsigned long last_idling_start_jiffies; |
ea25da480 block, bfq: split... |
575 576 577 578 579 580 581 582 583 584 585 586 587 |
/* number of samples in current observation interval */ int peak_rate_samples; /* num of samples of seq dispatches in current observation interval */ u32 sequential_samples; /* total num of sectors transferred in current observation interval */ u64 tot_sectors_dispatched; /* max rq size seen during current observation interval (sectors) */ u32 last_rq_max_size; /* time elapsed from first dispatch in current observ. interval (us) */ u64 delta_from_first; /* * Current estimate of the device peak rate, measured in |
bc56e2caf block, bfq: lower... |
588 |
* [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by |
ea25da480 block, bfq: split... |
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 |
* BFQ_RATE_SHIFT is performed to increase precision in * fixed-point calculations. */ u32 peak_rate; /* maximum budget allotted to a bfq_queue before rescheduling */ int bfq_max_budget; /* list of all the bfq_queues active on the device */ struct list_head active_list; /* list of all the bfq_queues idle on the device */ struct list_head idle_list; /* * Timeout for async/sync requests; when it fires, requests * are served in fifo order. */ u64 bfq_fifo_expire[2]; /* weight of backward seeks wrt forward ones */ unsigned int bfq_back_penalty; /* maximum allowed backward seek */ unsigned int bfq_back_max; /* maximum idling time */ u32 bfq_slice_idle; /* user-configured max budget value (0 for auto-tuning) */ int bfq_user_max_budget; /* * Timeout for bfq_queues to consume their budget; used to * prevent seeky queues from imposing long latencies to * sequential or quasi-sequential ones (this also implies that * seeky queues cannot receive guarantees in the service * domain; after a timeout they are charged for the time they * have been in service, to preserve fairness among them, but * without service-domain guarantees). */ unsigned int bfq_timeout; /* * Number of consecutive requests that must be issued within * the idle time slice to set again idling to a queue which * was marked as non-I/O-bound (see the definition of the * IO_bound flag for further details). */ unsigned int bfq_requests_within_timer; /* * Force device idling whenever needed to provide accurate * service guarantees, without caring about throughput * issues. CAVEAT: this may even increase latencies, in case * of useless idling for processes that did stop doing I/O. */ bool strict_guarantees; /* * Last time at which a queue entered the current burst of * queues being activated shortly after each other; for more * details about this and the following parameters related to * a burst of activations, see the comments on the function * bfq_handle_burst. */ unsigned long last_ins_in_burst; /* * Reference time interval used to decide whether a queue has * been activated shortly after @last_ins_in_burst. */ unsigned long bfq_burst_interval; /* number of queues in the current burst of queue activations */ int burst_size; /* common parent entity for the queues in the burst */ struct bfq_entity *burst_parent_entity; /* Maximum burst size above which the current queue-activation * burst is deemed as 'large'. */ unsigned long bfq_large_burst_thresh; /* true if a large queue-activation burst is in progress */ bool large_burst; /* * Head of the burst list (as for the above fields, more * details in the comments on the function bfq_handle_burst). */ struct hlist_head burst_list; /* if set to true, low-latency heuristics are enabled */ bool low_latency; /* * Maximum factor by which the weight of a weight-raised queue * is multiplied. */ unsigned int bfq_wr_coeff; /* maximum duration of a weight-raising period (jiffies) */ unsigned int bfq_wr_max_time; /* Maximum weight-raising duration for soft real-time processes */ unsigned int bfq_wr_rt_max_time; /* * Minimum idle period after which weight-raising may be * reactivated for a queue (in jiffies). */ unsigned int bfq_wr_min_idle_time; /* * Minimum period between request arrivals after which * weight-raising may be reactivated for an already busy async * queue (in jiffies). */ unsigned long bfq_wr_min_inter_arr_async; /* Max service-rate for a soft real-time queue, in sectors/sec */ unsigned int bfq_wr_max_softrt_rate; /* |
e24f1c245 block, bfq: remov... |
700 701 702 |
* Cached value of the product ref_rate*ref_wr_duration, used * for computing the maximum duration of weight raising * automatically. |
ea25da480 block, bfq: split... |
703 |
*/ |
e24f1c245 block, bfq: remov... |
704 |
u64 rate_dur_prod; |
ea25da480 block, bfq: split... |
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 |
/* fallback dummy bfqq for extreme OOM conditions */ struct bfq_queue oom_bfqq; spinlock_t lock; /* * bic associated with the task issuing current bio for * merging. This and the next field are used as a support to * be able to perform the bic lookup, needed by bio-merge * functions, before the scheduler lock is taken, and thus * avoid taking the request-queue lock while the scheduler * lock is being held. */ struct bfq_io_cq *bio_bic; /* bfqq associated with the task issuing current bio for merging */ struct bfq_queue *bio_bfqq; |
a52a69ea8 block, bfq: limit... |
722 723 |
/* |
a52a69ea8 block, bfq: limit... |
724 725 726 727 |
* Depth limits used in bfq_limit_depth (see comments on the * function) */ unsigned int word_depths[2][2]; |
ea25da480 block, bfq: split... |
728 729 730 731 732 733 734 735 736 737 738 |
}; enum bfqq_state_flags { BFQQF_just_created = 0, /* queue just allocated */ BFQQF_busy, /* has requests or is in service */ BFQQF_wait_request, /* waiting for a request */ BFQQF_non_blocking_wait_rq, /* * waiting for a request * without idling the device */ BFQQF_fifo_expire, /* FIFO checked in this slice */ |
d5be3fefc block,bfq: refact... |
739 |
BFQQF_has_short_ttime, /* queue has a short think time */ |
ea25da480 block, bfq: split... |
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 |
BFQQF_sync, /* synchronous queue */ BFQQF_IO_bound, /* * bfqq has timed-out at least once * having consumed at most 2/10 of * its budget */ BFQQF_in_large_burst, /* * bfqq activated in a large burst, * see comments to bfq_handle_burst. */ BFQQF_softrt_update, /* * may need softrt-next-start * update */ BFQQF_coop, /* bfqq is shared */ |
13a857a4c block, bfq: detec... |
755 756 |
BFQQF_split_coop, /* shared bfqq will be split */ BFQQF_has_waker /* bfqq has a waker queue */ |
ea25da480 block, bfq: split... |
757 758 759 760 761 762 763 764 765 766 767 768 |
}; #define BFQ_BFQQ_FNS(name) \ void bfq_mark_bfqq_##name(struct bfq_queue *bfqq); \ void bfq_clear_bfqq_##name(struct bfq_queue *bfqq); \ int bfq_bfqq_##name(const struct bfq_queue *bfqq); BFQ_BFQQ_FNS(just_created); BFQ_BFQQ_FNS(busy); BFQ_BFQQ_FNS(wait_request); BFQ_BFQQ_FNS(non_blocking_wait_rq); BFQ_BFQQ_FNS(fifo_expire); |
d5be3fefc block,bfq: refact... |
769 |
BFQ_BFQQ_FNS(has_short_ttime); |
ea25da480 block, bfq: split... |
770 771 772 773 774 775 |
BFQ_BFQQ_FNS(sync); BFQ_BFQQ_FNS(IO_bound); BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); |
13a857a4c block, bfq: detec... |
776 |
BFQ_BFQQ_FNS(has_waker); |
ea25da480 block, bfq: split... |
777 778 779 780 781 782 783 784 785 786 787 788 789 |
#undef BFQ_BFQQ_FNS /* Expiration reasons. */ enum bfqq_expiration { BFQQE_TOO_IDLE = 0, /* * queue has been idling for * too long */ BFQQE_BUDGET_TIMEOUT, /* budget took too long to be used */ BFQQE_BUDGET_EXHAUSTED, /* budget consumed */ BFQQE_NO_MORE_REQUESTS, /* the queue has no more requests */ BFQQE_PREEMPTED /* preemption in progress */ }; |
c0ce79dca blk-cgroup: move ... |
790 791 792 793 |
struct bfq_stat { struct percpu_counter cpu_cnt; atomic64_t aux_cnt; }; |
ea25da480 block, bfq: split... |
794 |
struct bfqg_stats { |
fd41e6033 bfq-iosched: stop... |
795 796 797 |
/* basic stats */ struct blkg_rwstat bytes; struct blkg_rwstat ios; |
8060c47ba block: rename CON... |
798 |
#ifdef CONFIG_BFQ_CGROUP_DEBUG |
ea25da480 block, bfq: split... |
799 800 801 802 803 804 805 806 807 |
/* number of ios merged */ struct blkg_rwstat merged; /* total time spent on device in ns, may not be accurate w/ queueing */ struct blkg_rwstat service_time; /* total time spent waiting in scheduler queue in ns */ struct blkg_rwstat wait_time; /* number of IOs queued up */ struct blkg_rwstat queued; /* total disk time and nr sectors dispatched by this group */ |
c0ce79dca blk-cgroup: move ... |
808 |
struct bfq_stat time; |
ea25da480 block, bfq: split... |
809 |
/* sum of number of ios queued across all samples */ |
c0ce79dca blk-cgroup: move ... |
810 |
struct bfq_stat avg_queue_size_sum; |
ea25da480 block, bfq: split... |
811 |
/* count of samples taken for average */ |
c0ce79dca blk-cgroup: move ... |
812 |
struct bfq_stat avg_queue_size_samples; |
ea25da480 block, bfq: split... |
813 |
/* how many times this group has been removed from service tree */ |
c0ce79dca blk-cgroup: move ... |
814 |
struct bfq_stat dequeue; |
ea25da480 block, bfq: split... |
815 |
/* total time spent waiting for it to be assigned a timeslice. */ |
c0ce79dca blk-cgroup: move ... |
816 |
struct bfq_stat group_wait_time; |
ea25da480 block, bfq: split... |
817 |
/* time spent idling for this blkcg_gq */ |
c0ce79dca blk-cgroup: move ... |
818 |
struct bfq_stat idle_time; |
ea25da480 block, bfq: split... |
819 |
/* total time with empty current active q with other requests queued */ |
c0ce79dca blk-cgroup: move ... |
820 |
struct bfq_stat empty_time; |
ea25da480 block, bfq: split... |
821 |
/* fields after this shouldn't be cleared on stat reset */ |
84c7afceb block: use ktime_... |
822 823 824 |
u64 start_group_wait_time; u64 start_idle_time; u64 start_empty_time; |
ea25da480 block, bfq: split... |
825 |
uint16_t flags; |
8060c47ba block: rename CON... |
826 |
#endif /* CONFIG_BFQ_CGROUP_DEBUG */ |
ea25da480 block, bfq: split... |
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 |
}; #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * struct bfq_group_data - per-blkcg storage for the blkio subsystem. * * @ps: @blkcg_policy_storage that this structure inherits * @weight: weight of the bfq_group */ struct bfq_group_data { /* must be the first member */ struct blkcg_policy_data pd; unsigned int weight; }; /** * struct bfq_group - per (device, cgroup) data structure. * @entity: schedulable entity to insert into the parent group sched_data. * @sched_data: own sched_data, to contain child entities (they may be * both bfq_queues and bfq_groups). * @bfqd: the bfq_data for the device this group acts upon. * @async_bfqq: array of async queues for all the tasks belonging to * the group, one queue per ioprio value per ioprio_class, * except for the idle class that has only one queue. * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). * @my_entity: pointer to @entity, %NULL for the toplevel group; used * to avoid too many special cases during group creation/ * migration. * @stats: stats for this bfqg. * @active_entities: number of active entities belonging to the group; * unused for the root group. Used to know whether there * are groups with more than one active @bfq_entity * (see the comments to the function * bfq_bfqq_may_idle()). * @rq_pos_tree: rbtree sorted by next_request position, used when * determining if two or more queues have interleaving * requests (see bfq_find_close_cooperator()). * * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup * there is a set of bfq_groups, each one collecting the lower-level * entities belonging to the group that are acting on the same device. * * Locking works as follows: * o @bfqd is protected by the queue lock, RCU is used to access it * from the readers. * o All the other fields are protected by the @bfqd queue lock. */ struct bfq_group { /* must be the first member */ struct blkg_policy_data pd; |
8f9bebc33 block, bfq: acces... |
879 880 881 882 883 |
/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */ char blkg_path[128]; /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; |
ea25da480 block, bfq: split... |
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 |
struct bfq_entity entity; struct bfq_sched_data sched_data; void *bfqd; struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; struct bfq_queue *async_idle_bfqq; struct bfq_entity *my_entity; int active_entities; struct rb_root rq_pos_tree; struct bfqg_stats stats; }; #else struct bfq_group { |
4d8340d0d block, bfq: remov... |
903 |
struct bfq_entity entity; |
ea25da480 block, bfq: split... |
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 |
struct bfq_sched_data sched_data; struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; struct bfq_queue *async_idle_bfqq; struct rb_root rq_pos_tree; }; #endif struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); /* --------------- main algorithm interface ----------------- */ #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) extern const int bfq_timeout; struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); |
ea25da480 block, bfq: split... |
925 |
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
2d29c9f89 block, bfq: impro... |
926 |
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
fb53ac6cd block, bfq: do no... |
927 |
struct rb_root_cached *root); |
0471559c2 block, bfq: add/r... |
928 |
void __bfq_weights_tree_remove(struct bfq_data *bfqd, |
2d29c9f89 block, bfq: impro... |
929 |
struct bfq_queue *bfqq, |
fb53ac6cd block, bfq: do no... |
930 |
struct rb_root_cached *root); |
0471559c2 block, bfq: add/r... |
931 932 |
void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
ea25da480 block, bfq: split... |
933 934 935 936 |
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); |
c89977366 block, bfq: turn ... |
937 |
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
ea25da480 block, bfq: split... |
938 939 940 941 942 943 |
void bfq_schedule_dispatch(struct bfq_data *bfqd); void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); /* ------------ end of main algorithm interface -------------- */ /* ---------------- cgroups-support interface ---------------- */ |
fd41e6033 bfq-iosched: stop... |
944 |
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); |
ea25da480 block, bfq: split... |
945 946 947 948 |
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, unsigned int op); void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op); |
84c7afceb block: use ktime_... |
949 950 |
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, unsigned int op); |
ea25da480 block, bfq: split... |
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 |
void bfqg_stats_update_dequeue(struct bfq_group *bfqg); void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); void bfqg_stats_update_idle_time(struct bfq_group *bfqg); void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg); void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg); void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg); void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); |
8f9bebc33 block, bfq: acces... |
967 |
void bfqg_and_blkg_put(struct bfq_group *bfqg); |
ea25da480 block, bfq: split... |
968 969 |
#ifdef CONFIG_BFQ_GROUP_IOSCHED |
659b3394e bfq: fix compile ... |
970 971 |
extern struct cftype bfq_blkcg_legacy_files[]; extern struct cftype bfq_blkg_files[]; |
ea25da480 block, bfq: split... |
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 |
extern struct blkcg_policy blkcg_policy_bfq; #endif /* ------------- end of cgroups-support interface ------------- */ /* - interface of the internal hierarchical B-WF2Q+ scheduler - */ #ifdef CONFIG_BFQ_GROUP_IOSCHED /* both next loops stop at one of the child entities of the root group */ #define for_each_entity(entity) \ for (; entity ; entity = entity->parent) /* * For each iteration, compute parent in advance, so as to be safe if * entity is deallocated during the iteration. Such a deallocation may * happen as a consequence of a bfq_put_queue that frees the bfq_queue * containing entity. */ #define for_each_entity_safe(entity, parent) \ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) #else /* CONFIG_BFQ_GROUP_IOSCHED */ /* * Next two macros are fake loops when cgroups support is not * enabled. I fact, in such a case, there is only one level to go up * (to reach the root group). */ #define for_each_entity(entity) \ for (; entity ; entity = NULL) #define for_each_entity_safe(entity, parent) \ for (parent = NULL; entity ; entity = parent) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq); struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity); |
73d581184 block, bfq: consi... |
1008 |
unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd); |
ea25da480 block, bfq: split... |
1009 1010 1011 1012 1013 1014 1015 |
struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity); struct bfq_entity *bfq_entity_of(struct rb_node *node); unsigned short bfq_ioprio_to_weight(int ioprio); void bfq_put_idle_entity(struct bfq_service_tree *st, struct bfq_entity *entity); struct bfq_service_tree * __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, |
431b17f9d block, bfq: don't... |
1016 1017 |
struct bfq_entity *entity, bool update_class_too); |
ea25da480 block, bfq: split... |
1018 1019 1020 1021 1022 1023 1024 |
void bfq_bfqq_served(struct bfq_queue *bfqq, int served); void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned long time_ms); bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree); bool next_queue_may_preempt(struct bfq_data *bfqd); struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); |
eed47d19d block, bfq: fix u... |
1025 |
bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); |
ea25da480 block, bfq: split... |
1026 1027 1028 |
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool ins_into_idle_tree, bool expiration); void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
80294c3bb block, bfq: make ... |
1029 1030 |
void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool expiration); |
ea25da480 block, bfq: split... |
1031 1032 1033 1034 1035 1036 1037 |
void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool expiration); void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); /* --------------- end of interface of B-WF2Q+ ---------------- */ /* Logging facilities. */ |
1e66413c4 block, bfq: print... |
1038 1039 1040 1041 1042 1043 1044 |
static inline void bfq_pid_to_str(int pid, char *str, int len) { if (pid != -1) snprintf(str, len, "%d", pid); else snprintf(str, len, "SHARED-"); } |
ea25da480 block, bfq: split... |
1045 1046 1047 1048 |
#ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqq_group(struct bfq_queue *bfqq); #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ |
1e66413c4 block, bfq: print... |
1049 |
char pid_str[MAX_PID_STR_LENGTH]; \ |
40d47c155 block,bfq: Skip t... |
1050 1051 |
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ break; \ |
1e66413c4 block, bfq: print... |
1052 |
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ |
35fe6d763 block: use standa... |
1053 1054 |
blk_add_cgroup_trace_msg((bfqd)->queue, \ bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ |
1e66413c4 block, bfq: print... |
1055 |
"bfq%s%c " fmt, pid_str, \ |
35fe6d763 block: use standa... |
1056 |
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \ |
ea25da480 block, bfq: split... |
1057 |
} while (0) |
35fe6d763 block: use standa... |
1058 1059 1060 1061 |
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ blk_add_cgroup_trace_msg((bfqd)->queue, \ bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \ } while (0) |
ea25da480 block, bfq: split... |
1062 1063 |
#else /* CONFIG_BFQ_GROUP_IOSCHED */ |
1e66413c4 block, bfq: print... |
1064 1065 |
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ char pid_str[MAX_PID_STR_LENGTH]; \ |
40d47c155 block,bfq: Skip t... |
1066 1067 |
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ break; \ |
1e66413c4 block, bfq: print... |
1068 1069 |
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \ |
ea25da480 block, bfq: split... |
1070 |
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ |
1e66413c4 block, bfq: print... |
1071 1072 |
##args); \ } while (0) |
ea25da480 block, bfq: split... |
1073 1074 1075 1076 1077 1078 1079 1080 |
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ #define bfq_log(bfqd, fmt, args...) \ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) #endif /* _BFQ_H */ |