Blame view

Documentation/block/ioprio.txt 4.6 KB
52a5e15f6   Jens Axboe   [PATCH] CFQ io sc...
1
2
3
4
5
6
7
8
  Block io priorities
  ===================
  
  
  Intro
  -----
  
  With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
a982ac06b   Matt LaPlante   misc doc and kcon...
9
10
11
12
  priorities are supported for reads on files.  This enables users to io nice
  processes or process groups, similar to what has been possible with cpu
  scheduling for ages.  This document mainly details the current possibilities
  with cfq; other io schedulers do not support io priorities thus far.
52a5e15f6   Jens Axboe   [PATCH] CFQ io sc...
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  
  Scheduling classes
  ------------------
  
  CFQ implements three generic scheduling classes that determine how io is
  served for a process.
  
  IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
  higher priority than any other in the system, processes from this class are
  given first access to the disk every time. Thus it needs to be used with some
  care, one io RT process can starve the entire system. Within the RT class,
  there are 8 levels of class data that determine exactly how much time this
  process needs the disk for on each service. In the future this might change
  to be more directly mappable to performance, by passing in a wanted data
  rate instead.
  
  IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
  for any process that hasn't set a specific io priority. The class data
  determines how much io bandwidth the process will get, it's directly mappable
  to the cpu nice levels just more coarsely implemented. 0 is the highest
  BE prio level, 7 is the lowest. The mapping between cpu nice level and io
  nice level is determined as: io_nice = (cpu_nice + 20) / 5.
  
  IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
  level only get io time when no one else needs the disk. The idle class has no
  class data, since it doesn't really apply here.
  
  Tools
  -----
  
  See below for a sample ionice tool. Usage:
  
  # ionice -c<class> -n<level> -p<pid>
  
  If pid isn't given, the current process is assumed. IO priority settings
  are inherited on fork, so you can use ionice to start the process at a given
  level:
  
  # ionice -c2 -n0 /bin/ls
  
  will run ls at the best-effort scheduling class at the highest priority.
  For a running process, you can give the pid instead:
  
  # ionice -c1 -n2 -p100
  
  will change pid 100 to run at the realtime scheduling class, at priority 2.
  
  ---> snip ionice.c tool <---
  
  #include <stdio.h>
  #include <stdlib.h>
  #include <errno.h>
  #include <getopt.h>
  #include <unistd.h>
  #include <sys/ptrace.h>
  #include <asm/unistd.h>
  
  extern int sys_ioprio_set(int, int, int);
  extern int sys_ioprio_get(int, int);
  
  #if defined(__i386__)
  #define __NR_ioprio_set		289
  #define __NR_ioprio_get		290
  #elif defined(__ppc__)
  #define __NR_ioprio_set		273
  #define __NR_ioprio_get		274
  #elif defined(__x86_64__)
  #define __NR_ioprio_set		251
  #define __NR_ioprio_get		252
  #elif defined(__ia64__)
  #define __NR_ioprio_set		1274
  #define __NR_ioprio_get		1275
  #else
  #error "Unsupported arch"
  #endif
3317fedba   Dhaval Giani   Corrections in Do...
88
89
90
91
92
93
94
95
96
  static inline int ioprio_set(int which, int who, int ioprio)
  {
  	return syscall(__NR_ioprio_set, which, who, ioprio);
  }
  
  static inline int ioprio_get(int which, int who)
  {
  	return syscall(__NR_ioprio_get, which, who);
  }
52a5e15f6   Jens Axboe   [PATCH] CFQ io sc...
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  
  enum {
  	IOPRIO_CLASS_NONE,
  	IOPRIO_CLASS_RT,
  	IOPRIO_CLASS_BE,
  	IOPRIO_CLASS_IDLE,
  };
  
  enum {
  	IOPRIO_WHO_PROCESS = 1,
  	IOPRIO_WHO_PGRP,
  	IOPRIO_WHO_USER,
  };
  
  #define IOPRIO_CLASS_SHIFT	13
  
  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
  
  int main(int argc, char *argv[])
  {
  	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
  	int c, pid = 0;
  
  	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
  		switch (c) {
  		case 'n':
  			ioprio = strtol(optarg, NULL, 10);
  			set = 1;
  			break;
  		case 'c':
  			ioprio_class = strtol(optarg, NULL, 10);
  			set = 1;
  			break;
  		case 'p':
  			pid = strtol(optarg, NULL, 10);
  			break;
  		}
  	}
  
  	switch (ioprio_class) {
  		case IOPRIO_CLASS_NONE:
  			ioprio_class = IOPRIO_CLASS_BE;
  			break;
  		case IOPRIO_CLASS_RT:
  		case IOPRIO_CLASS_BE:
  			break;
  		case IOPRIO_CLASS_IDLE:
  			ioprio = 7;
  			break;
  		default:
  			printf("bad prio class %d
  ", ioprio_class);
  			return 1;
  	}
  
  	if (!set) {
  		if (!pid && argv[optind])
  			pid = strtol(argv[optind], NULL, 10);
  
  		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
  
  		printf("pid=%d, %d
  ", pid, ioprio);
  
  		if (ioprio == -1)
  			perror("ioprio_get");
  		else {
  			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
  			ioprio = ioprio & 0xff;
  			printf("%s: prio %d
  ", to_prio[ioprio_class], ioprio);
  		}
  	} else {
  		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
  			perror("ioprio_set");
  			return 1;
  		}
  
  		if (argv[optind])
  			execvp(argv[optind], &argv[optind]);
  	}
  
  	return 0;
  }
  
  ---> snip ionice.c tool <---
26bbb29a2   Rob Landley   Update Jens Axboe...
183
  March 11 2005, Jens Axboe <jens.axboe@oracle.com>