Commit ce61cdc270a5e0dd18057bbf29bd3471abccbda8
1 parent
d7c9661115
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
tile: make __write_once a synonym for __read_mostly
This was really only useful for TILE64 when we mapped the kernel data with small pages. Now we use a huge page and we really don't want to map different parts of the kernel data in different ways. We retain the __write_once name in case we want to bring it back to life at some point in the future. Note that this change uncovered a latent bug where the "smp_topology" variable happened to always be aligned mod 8 so we could store two "int" values at once, but when we eliminated __write_once it ended up only aligned mod 4. Fix with an explicit annotation. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Showing 4 changed files with 17 additions and 27 deletions Side-by-side Diff
arch/tile/include/asm/cache.h
... | ... | @@ -49,10 +49,17 @@ |
49 | 49 | #define __read_mostly __attribute__((__section__(".data..read_mostly"))) |
50 | 50 | |
51 | 51 | /* |
52 | - * Attribute for data that is kept read/write coherent until the end of | |
53 | - * initialization, then bumped to read/only incoherent for performance. | |
52 | + * Originally we used small TLB pages for kernel data and grouped some | |
53 | + * things together as "write once", enforcing the property at the end | |
54 | + * of initialization by making those pages read-only and non-coherent. | |
55 | + * This allowed better cache utilization since cache inclusion did not | |
56 | + * need to be maintained. However, to do this requires an extra TLB | |
57 | + * entry, which on balance is more of a performance hit than the | |
58 | + * non-coherence is a performance gain, so we now just make "read | |
59 | + * mostly" and "write once" be synonyms. We keep the attribute | |
60 | + * separate in case we change our minds at a future date. | |
54 | 61 | */ |
55 | -#define __write_once __attribute__((__section__(".w1data"))) | |
62 | +#define __write_once __read_mostly | |
56 | 63 | |
57 | 64 | #endif /* _ASM_TILE_CACHE_H */ |
arch/tile/kernel/smp.c
... | ... | @@ -22,7 +22,11 @@ |
22 | 22 | #include <asm/cacheflush.h> |
23 | 23 | #include <asm/homecache.h> |
24 | 24 | |
25 | -HV_Topology smp_topology __write_once; | |
25 | +/* | |
26 | + * We write to width and height with a single store in head_NN.S, | |
27 | + * so make the variable aligned to "long". | |
28 | + */ | |
29 | +HV_Topology smp_topology __write_once __aligned(sizeof(long)); | |
26 | 30 | EXPORT_SYMBOL(smp_topology); |
27 | 31 | |
28 | 32 | #if CHIP_HAS_IPI() |
arch/tile/kernel/vmlinux.lds.S
... | ... | @@ -74,20 +74,8 @@ |
74 | 74 | __init_end = .; |
75 | 75 | |
76 | 76 | _sdata = .; /* Start of data section */ |
77 | - | |
78 | 77 | RO_DATA_SECTION(PAGE_SIZE) |
79 | - | |
80 | - /* initially writeable, then read-only */ | |
81 | - . = ALIGN(PAGE_SIZE); | |
82 | - __w1data_begin = .; | |
83 | - .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { | |
84 | - VMLINUX_SYMBOL(__w1data_begin) = .; | |
85 | - *(.w1data) | |
86 | - VMLINUX_SYMBOL(__w1data_end) = .; | |
87 | - } | |
88 | - | |
89 | 78 | RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) |
90 | - | |
91 | 79 | _edata = .; |
92 | 80 | |
93 | 81 | EXCEPTION_TABLE(L2_CACHE_BYTES) |
arch/tile/mm/init.c
... | ... | @@ -271,21 +271,13 @@ |
271 | 271 | return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); |
272 | 272 | |
273 | 273 | /* |
274 | - * Make the w1data homed like heap to start with, to avoid | |
275 | - * making it part of the page-striped data area when we're just | |
276 | - * going to convert it to read-only soon anyway. | |
277 | - */ | |
278 | - if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end) | |
279 | - return construct_pgprot(PAGE_KERNEL, initial_heap_home()); | |
280 | - | |
281 | - /* | |
282 | 274 | * Otherwise we just hand out consecutive cpus. To avoid |
283 | 275 | * requiring this function to hold state, we just walk forward from |
284 | 276 | * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach |
285 | 277 | * the requested address, while walking cpu home around kdata_mask. |
286 | 278 | * This is typically no more than a dozen or so iterations. |
287 | 279 | */ |
288 | - page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK; | |
280 | + page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; | |
289 | 281 | BUG_ON(address < page || address >= (ulong)_end); |
290 | 282 | cpu = cpumask_first(&kdata_mask); |
291 | 283 | for (; page < address; page += PAGE_SIZE) { |
... | ... | @@ -980,8 +972,7 @@ |
980 | 972 | const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; |
981 | 973 | |
982 | 974 | /* |
983 | - * Evict the dirty initdata on the boot cpu, evict the w1data | |
984 | - * wherever it's homed, and evict all the init code everywhere. | |
975 | + * Evict the cache on all cores to avoid incoherence. | |
985 | 976 | * We are guaranteed that no one will touch the init pages any more. |
986 | 977 | */ |
987 | 978 | homecache_evict(&cpu_cacheable_map); |