Attachment 'linux-2.4.9-bigpages.patch'
Download 1 --- linux/fs/proc/proc_misc.c.orig Sun Feb 24 15:36:55 2002
2 +++ linux/fs/proc/proc_misc.c Wed Feb 27 09:09:02 2002
3 @@ -179,7 +179,8 @@
4 "LowTotal: %8lu kB\n"
5 "LowFree: %8lu kB\n"
6 "SwapTotal: %8lu kB\n"
7 - "SwapFree: %8lu kB\n",
8 + "SwapFree: %8lu kB\n"
9 + "BigPagesFree: %8lu kB\n",
10 K(i.totalram),
11 K(i.freeram),
12 K(i.sharedram),
13 @@ -195,7 +196,8 @@
14 K(i.totalram-i.totalhigh),
15 K(i.freeram-i.freehigh),
16 K(i.totalswap),
17 - K(i.freeswap));
18 + K(i.freeswap),
19 + nr_bigpages << (PMD_SHIFT-10));
20
21 return proc_calc_metrics(page, start, off, count, eof, len);
22 #undef B
23 --- linux/fs/proc/array.c.orig Sun Feb 24 19:40:00 2002
24 +++ linux/fs/proc/array.c Tue Mar 5 23:02:48 2002
25 @@ -397,6 +397,13 @@
26
27 if (pmd_none(*pmd))
28 return;
29 + if (pmd_bigpage(*pmd)) {
30 + *total += BIGPAGE_PAGES;
31 + *pages += BIGPAGE_PAGES;
32 + *shared += BIGPAGE_PAGES;
33 + *dirty += BIGPAGE_PAGES;
34 + return;
35 + }
36 if (pmd_bad(*pmd)) {
37 pmd_ERROR(*pmd);
38 pmd_clear(pmd);
39 @@ -557,6 +564,12 @@
40 str[1] = flags & VM_WRITE ? 'w' : '-';
41 str[2] = flags & VM_EXEC ? 'x' : '-';
42 str[3] = flags & VM_MAYSHARE ? 's' : 'p';
43 + if (flags & VM_BIGPAGE)
44 + str[3] = 'B';
45 + else
46 + if (map->vm_ops && (map->vm_ops->nopage == shmem_nopage) &&
47 + I_BIGPAGE(map->vm_file->f_dentry->d_inode))
48 + str[3] = 'b';
49 str[4] = 0;
50
51 dev = 0;
52 --- linux/fs/binfmt_elf.c.orig Sun Feb 24 19:39:14 2002
53 +++ linux/fs/binfmt_elf.c Wed Feb 27 09:03:34 2002
54 @@ -1212,12 +1212,12 @@
55 if (pgd_none(*pgd))
56 goto nextpage_coredump;
57 pmd = pmd_offset(pgd, addr);
58 - if (pmd_none(*pmd))
59 + if (pmd_none(*pmd) || pmd_bigpage(*pmd))
60 goto nextpage_coredump;
61 pte = pte_offset_map(pmd, addr);
62 none = pte_none(*pte);
63 pte_unmap(pte);
64 - if (pte_none(*pte)) {
65 + if (none) {
66 nextpage_coredump:
67 DUMP_SEEK (file->f_pos + PAGE_SIZE);
68 } else {
69 --- linux/kernel/ptrace.c.orig Sun Feb 24 19:41:59 2002
70 +++ linux/kernel/ptrace.c Sun Feb 24 20:26:07 2002
71 @@ -105,15 +105,24 @@
72 pgmiddle = pmd_offset(pgdir, addr);
73 if (pmd_none(*pgmiddle))
74 goto fault_in_page;
75 - if (pmd_bad(*pgmiddle))
76 - goto bad_pmd;
77 - pgtable = pte_offset_map(pgmiddle, addr);
78 - if (!pte_present(*pgtable))
79 - goto fault_in_page_unmap;
80 - if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
81 - goto fault_in_page_unmap;
82 - page = pte_page(*pgtable);
83 - pte_unmap(pgtable);
84 + if (pmd_bigpage(*pgmiddle)) {
85 + unsigned long idx = (addr & BIGPAGE_MASK) / PAGE_SIZE;
86 +
87 + page = pmd_page(*pgmiddle);
88 + if (!BigPage(page))
89 + BUG();
90 + page += idx;
91 + } else {
92 + if (pmd_bad(*pgmiddle))
93 + goto bad_pmd;
94 + pgtable = pte_offset_map(pgmiddle, addr);
95 + if (!pte_present(*pgtable))
96 + goto fault_in_page_unmap;
97 + if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
98 + goto fault_in_page_unmap;
99 + page = pte_page(*pgtable);
100 + pte_unmap(pgtable);
101 + }
102
103 /* ZERO_PAGE is special: reads from it are ok even though it's marked reserved */
104 if (page != ZERO_PAGE(addr) || write) {
105 --- linux/kernel/fork.c.orig Sun Feb 24 21:16:40 2002
106 +++ linux/kernel/fork.c Wed Mar 6 20:49:40 2002
107 @@ -176,7 +176,8 @@
108 if (!tmp)
109 goto fail_nomem;
110 *tmp = *mpnt;
111 - tmp->vm_flags &= ~VM_LOCKED;
112 + if (!(tmp->vm_flags & VM_BIGPAGE))
113 + tmp->vm_flags &= ~VM_LOCKED;
114 tmp->vm_mm = mm;
115 tmp->vm_next = NULL;
116 file = tmp->vm_file;
117 --- linux/kernel/sysctl.c.orig Tue Feb 26 09:33:03 2002
118 +++ linux/kernel/sysctl.c Tue Feb 26 10:08:18 2002
119 @@ -222,6 +222,8 @@
120 0444, NULL, &proc_dointvec},
121 {KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int),
122 0644, NULL, &proc_dointvec},
123 + {KERN_SHMUSEBIGPAGES, "shm-use-bigpages", &shm_use_bigpages, sizeof(int),
124 + 0644, NULL, &proc_dointvec},
125 #ifdef CONFIG_SYSVIPC
126 {KERN_SHMMAX, "shmmax", &shm_ctlmax, sizeof (size_t),
127 0644, NULL, &proc_doulongvec_minmax},
128 --- linux/mm/bigpages.c.orig Tue Mar 5 17:50:34 2002
129 +++ linux/mm/bigpages.c Sat Mar 2 11:54:40 2002
130 @@ -0,0 +1,96 @@
131 +/*
132 + * linux/mm/bigpages.c
133 + *
134 + * Copyright (C) 2002 Ingo Molnar
135 + */
136 +
137 +#include <linux/slab.h>
138 +#include <linux/vmalloc.h>
139 +#include <linux/spinlock.h>
140 +#include <linux/highmem.h>
141 +#include <linux/smp_lock.h>
142 +#include <linux/interrupt.h>
143 +#include <linux/init.h>
144 +#include <asm/uaccess.h>
145 +#include <asm/pgalloc.h>
146 +
147 +static spinlock_t bigpages_lock = SPIN_LOCK_UNLOCKED;
148 +long nr_bigpages;
149 +static LIST_HEAD(bigpages_list);
150 +
151 +#define ORDER_BIGPAGE (PMD_SHIFT - PAGE_SHIFT)
152 +
153 +struct page *alloc_bigpage(void)
154 +{
155 + list_t *head = &bigpages_list;
156 + struct page *page = NULL;
157 +
158 + spin_lock(&bigpages_lock);
159 + if (nr_bigpages) {
160 + page = list_entry(head->next, struct page, list);
161 + list_del_init(head->next);
162 + nr_bigpages--;
163 + }
164 + spin_unlock(&bigpages_lock);
165 +
166 + return page;
167 +}
168 +
169 +void free_bigpage(struct page *page)
170 +{
171 + struct page *p;
172 + int i;
173 +
174 + if ((page - mem_map) % BIGPAGE_PAGES)
175 + BUG();
176 + for (i = 0 ; i < (1 << ORDER_BIGPAGE); i++) {
177 + p = page + i;
178 + set_page_count(p, 2);
179 + set_bit(PG_bigpage, &p->flags);
180 + clear_highpage(p);
181 + }
182 + spin_lock(&bigpages_lock);
183 + nr_bigpages++;
184 + list_add(&page->list, &bigpages_list);
185 + spin_unlock(&bigpages_lock);
186 +}
187 +
188 +static int grow_bigpages_pool(int pages)
189 +{
190 + struct page *page;
191 + int allocated = 0;
192 +
193 + while (pages) {
194 + page = alloc_pages(__GFP_HIGHMEM, ORDER_BIGPAGE);
195 + if (!page)
196 + break;
197 + free_bigpage(page);
198 + pages--;
199 + allocated++;
200 + }
201 + printk("bigpage subsystem: allocated %ld bigpages (=%ldMB).\n",
202 + nr_bigpages, nr_bigpages << (BIGPAGE_SHIFT - 20));
203 + return allocated;
204 +}
205 +
206 +static __initdata int boot_bigpages;
207 +
208 +static __init int reserve_bigpages(char *str)
209 +{
210 + unsigned long pages = memparse(str, &str) >> PAGE_SHIFT;
211 +
212 + pages >>= ORDER_BIGPAGE;
213 + boot_bigpages = pages;
214 +
215 + return 0;
216 +}
217 +
218 +static __init int init_bigpage_pool(void)
219 +{
220 + grow_bigpages_pool(boot_bigpages);
221 + return 0;
222 +}
223 +
224 +__setup("bigpages=", reserve_bigpages);
225 +__initcall(init_bigpage_pool);
226 +
227 --- linux/mm/page_alloc.c.orig Sun Feb 24 11:49:51 2002
228 +++ linux/mm/page_alloc.c Wed Mar 6 20:52:15 2002
229 @@ -72,13 +72,8 @@
230 per_cpu_t *per_cpu;
231 zone_t *zone;
232
233 - /*
234 - * This late check is safe because reserved pages do not
235 - * have a valid page->count. This trick avoids overhead
236 - * in __free_pages().
237 - */
238 - if (PageReserved(page))
239 - return;
240 + if (BigPage(page))
241 + BUG();
242 if (page->buffers)
243 BUG();
244 if (page->mapping)
245 @@ -209,6 +204,8 @@
246 __restore_flags(flags);
247
248 set_page_count(page, 1);
249 + if (BigPage(page))
250 + BUG();
251 return page;
252 }
253
254 @@ -236,6 +233,8 @@
255 if (BAD_RANGE(zone,page))
256 BUG();
257 DEBUG_ADD_PAGE
258 + if (BigPage(page))
259 + BUG();
260 return page;
261 }
262 curr_order++;
263 @@ -594,6 +595,8 @@
264
265 void __free_pages(struct page *page, unsigned long order)
266 {
267 + if (PageReserved(page))
268 + return;
269 if (put_page_testzero(page))
270 __free_pages_ok(page, order);
271 }
272 --- linux/mm/shmem.c.orig Sun Feb 24 13:08:00 2002
273 +++ linux/mm/shmem.c Wed Mar 6 20:55:39 2002
274 @@ -5,6 +5,7 @@
275 * 2000 Transmeta Corp.
276 * 2000-2001 Christoph Rohland
277 * 2000-2001 SAP AG
278 + * 2002 Ingo Molnar, Red Hat Inc.
279 *
280 * This file is released under the GPL.
281 */
282 @@ -21,6 +22,7 @@
283 #include <linux/devfs_fs_kernel.h>
284 #include <linux/fs.h>
285 #include <linux/mm.h>
286 +#include <linux/slab.h>
287 #include <linux/file.h>
288 #include <linux/swap.h>
289 #include <linux/pagemap.h>
290 @@ -49,6 +51,8 @@
291 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
292 atomic_t shmem_nrpages = ATOMIC_INIT(0);
293
294 +int shm_use_bigpages;
295 +
296 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
297
298 static void shmem_removepage(struct page *page)
299 @@ -76,19 +80,40 @@
300 * It has to be called with the spinlock held.
301 */
302
303 +static int shm_alloc_space(struct inode * inode, unsigned long space)
304 +{
305 + struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
306 +
307 + spin_lock(&sbinfo->stat_lock);
308 + if (sbinfo->free_blocks < space) {
309 + spin_unlock(&sbinfo->stat_lock);
310 + return -ENOSPC;
311 + }
312 + sbinfo->free_blocks -= space;
313 + inode->i_blocks += space*BLOCKS_PER_PAGE;
314 + spin_unlock(&sbinfo->stat_lock);
315 +
316 + return 0;
317 +}
318 +
319 +static void shm_free_space(struct inode * inode, unsigned long freed)
320 +{
321 + struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
322 +
323 + spin_lock(&sbinfo->stat_lock);
324 + sbinfo->free_blocks += freed;
325 + inode->i_blocks -= freed*BLOCKS_PER_PAGE;
326 + spin_unlock(&sbinfo->stat_lock);
327 +}
328 +
329 static void shmem_recalc_inode(struct inode * inode)
330 {
331 unsigned long freed;
332
333 freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
334 (inode->i_mapping->nrpages + SHMEM_I(inode)->swapped);
335 - if (freed){
336 - struct shmem_sb_info * sbinfo = SHMEM_SB(inode->i_sb);
337 - inode->i_blocks -= freed*BLOCKS_PER_PAGE;
338 - spin_lock (&sbinfo->stat_lock);
339 - sbinfo->free_blocks += freed;
340 - spin_unlock (&sbinfo->stat_lock);
341 - }
342 + if (freed)
343 + shm_free_space(inode, freed);
344 }
345
346 /*
347 @@ -134,7 +159,7 @@
348
349 #define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * ENTRIES_PER_PAGE/2*(ENTRIES_PER_PAGE+1))
350
351 -static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page)
352 +static swp_entry_t * shmem_swp_entry (shmem_info_t *info, unsigned long index, unsigned long page)
353 {
354 unsigned long offset;
355 void **dir;
356 @@ -181,7 +206,7 @@
357 * @info: info structure for the inode
358 * @index: index of the page to find
359 */
360 -static inline swp_entry_t * shmem_alloc_entry (struct shmem_inode_info *info, unsigned long index)
361 +static inline swp_entry_t * shmem_alloc_entry (shmem_info_t *info, unsigned long index)
362 {
363 unsigned long page = 0;
364 swp_entry_t * res;
365 @@ -290,7 +315,7 @@
366 * then shmem_truncate_direct to do the real work
367 */
368 static inline unsigned long
369 -shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
370 +shmem_truncate_indirect(shmem_info_t *info, unsigned long index)
371 {
372 swp_entry_t ***base;
373 unsigned long baseidx, len, start;
374 @@ -328,40 +353,61 @@
375 return shmem_truncate_direct(base, start, len);
376 }
377
378 -static void shmem_truncate (struct inode * inode)
379 +static void shmem_truncate(struct inode *inode)
380 {
381 - unsigned long index;
382 - unsigned long freed = 0;
383 - struct shmem_inode_info * info = SHMEM_I(inode);
384 + shmem_info_t * info = SHMEM_I(inode);
385 + unsigned long freed = 0, index, i;
386 + struct page *page;
387
388 down(&info->sem);
389 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
390 - spin_lock (&info->lock);
391 - index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
392 + spin_lock(&info->lock);
393 + if (!info->bigpages) {
394 + index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
395
396 - while (index < info->next_index)
397 - freed += shmem_truncate_indirect(info, index);
398 + while (index < info->next_index)
399 + freed += shmem_truncate_indirect(info, index);
400
401 - info->swapped -= freed;
402 - shmem_recalc_inode(inode);
403 - spin_unlock (&info->lock);
404 + info->swapped -= freed;
405 + shmem_recalc_inode(inode);
406 + } else {
407 + index = (inode->i_size + BIGPAGE_SIZE - 1) >> BIGPAGE_SHIFT;
408 +
409 + for (i = index; i < info->max_bigpages; i++) {
410 + page = info->bigpages[i];
411 + if (page) {
412 + info->bigpages[i] = NULL;
413 + free_bigpage(page);
414 + shm_free_space(inode, BIGPAGE_PAGES);
415 + }
416 + }
417 + }
418 + spin_unlock(&info->lock);
419 up(&info->sem);
420 }
421
422 static void shmem_delete_inode(struct inode * inode)
423 {
424 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
425 + shmem_info_t *info = SHMEM_I(inode);
426
427 inode->i_size = 0;
428 - if (inode->i_op->truncate == shmem_truncate){
429 - spin_lock (&shmem_ilock);
430 - list_del (&SHMEM_I(inode)->list);
431 - spin_unlock (&shmem_ilock);
432 - shmem_truncate (inode);
433 + if (inode->i_op->truncate == shmem_truncate) {
434 + spin_lock(&shmem_ilock);
435 + list_del(&info->list);
436 + spin_unlock(&shmem_ilock);
437 + shmem_truncate(inode);
438 + if (info->bigpages) {
439 + kfree(info->bigpages);
440 + info->bigpages = NULL;
441 + info->max_bigpages = 0;
442 + }
443 }
444 - spin_lock (&sbinfo->stat_lock);
445 + if (info->bigpages)
446 + BUG();
447 + spin_lock(&sbinfo->stat_lock);
448 sbinfo->free_inodes++;
449 - spin_unlock (&sbinfo->stat_lock);
450 + spin_unlock(&sbinfo->stat_lock);
451 clear_inode(inode);
452 }
453
454 @@ -378,14 +424,14 @@
455 return -1;
456 }
457
458 -static int shmem_unuse_inode (struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
459 +static int shmem_unuse_inode (shmem_info_t *info, swp_entry_t entry, struct page *page)
460 {
461 swp_entry_t *ptr;
462 unsigned long idx;
463 int offset;
464
465 idx = 0;
466 - spin_lock (&info->lock);
467 + spin_lock(&info->lock);
468 offset = shmem_clear_swp (entry, info->i_direct, SHMEM_NR_DIRECT);
469 if (offset >= 0)
470 goto found;
471 @@ -399,7 +445,7 @@
472 if (offset >= 0)
473 goto found;
474 }
475 - spin_unlock (&info->lock);
476 + spin_unlock(&info->lock);
477 return 0;
478 found:
479 add_to_page_cache(page, info->inode->i_mapping, offset + idx);
480 @@ -418,16 +464,18 @@
481 void shmem_unuse(swp_entry_t entry, struct page *page)
482 {
483 struct list_head *p;
484 - struct shmem_inode_info * info;
485 + shmem_info_t * info;
486
487 - spin_lock (&shmem_ilock);
488 + if (BigPage(page))
489 + BUG();
490 + spin_lock(&shmem_ilock);
491 list_for_each(p, &shmem_inodes) {
492 - info = list_entry(p, struct shmem_inode_info, list);
493 + info = list_entry(p, shmem_info_t, list);
494
495 if (shmem_unuse_inode(info, entry, page))
496 break;
497 }
498 - spin_unlock (&shmem_ilock);
499 + spin_unlock(&shmem_ilock);
500 }
501
502 /*
503 @@ -440,12 +488,14 @@
504 static int shmem_writepage(struct page * page)
505 {
506 int error;
507 - struct shmem_inode_info *info;
508 + shmem_info_t *info;
509 swp_entry_t *entry, swap;
510 struct inode *inode;
511
512 if (!PageLocked(page))
513 BUG();
514 + if (BigPage(page))
515 + BUG();
516
517 inode = page->mapping->host;
518 info = SHMEM_I(inode);
519 @@ -494,13 +544,14 @@
520 * still need to guard against racing with shm_writepage(), which might
521 * be trying to move the page to the swap cache as we run.
522 */
523 -static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx)
524 +static struct page * shmem_getpage_locked(shmem_info_t *info, struct inode * inode, unsigned long idx)
525 {
526 struct address_space * mapping = inode->i_mapping;
527 - struct shmem_sb_info *sbinfo;
528 struct page * page;
529 swp_entry_t *entry;
530
531 + if (info->bigpages)
532 + BUG();
533 repeat:
534 page = find_lock_page(mapping, idx);
535 if (page)
536 @@ -510,7 +561,7 @@
537 if (IS_ERR(entry))
538 return (void *)entry;
539
540 - spin_lock (&info->lock);
541 + spin_lock(&info->lock);
542
543 /* The shmem_alloc_entry() call may have blocked, and
544 * shmem_writepage may have been moving a page between the page
545 @@ -521,7 +572,7 @@
546 if (page) {
547 if (TryLockPage(page))
548 goto wait_retry;
549 - spin_unlock (&info->lock);
550 + spin_unlock(&info->lock);
551 return page;
552 }
553
554 @@ -533,7 +584,7 @@
555 page = __find_get_page(&swapper_space, entry->val,
556 page_hash(&swapper_space, entry->val));
557 if (!page) {
558 - spin_unlock (&info->lock);
559 + spin_unlock(&info->lock);
560 lock_kernel();
561 swapin_readahead(*entry);
562 page = read_swap_cache_async(*entry);
563 @@ -563,15 +614,12 @@
564 page->flags = flags | (1 << PG_dirty);
565 add_to_page_cache_locked(page, mapping, idx);
566 info->swapped--;
567 - spin_unlock (&info->lock);
568 + spin_unlock(&info->lock);
569 } else {
570 - sbinfo = SHMEM_SB(inode->i_sb);
571 - spin_unlock (&info->lock);
572 - spin_lock (&sbinfo->stat_lock);
573 - if (sbinfo->free_blocks == 0)
574 - goto no_space;
575 - sbinfo->free_blocks--;
576 - spin_unlock (&sbinfo->stat_lock);
577 + spin_unlock(&info->lock);
578 +
579 + if (shm_alloc_space(inode, 1))
580 + return ERR_PTR(-ENOSPC);
581
582 /* Ok, get a new page. We don't have to worry about the
583 * info->lock spinlock here: we cannot race against
584 @@ -581,10 +629,11 @@
585 * new shm entry. The inode semaphore we already hold
586 * is enough to make this atomic. */
587 page = page_cache_alloc(mapping);
588 - if (!page)
589 + if (!page) {
590 + shm_free_space(inode, 1);
591 return ERR_PTR(-ENOMEM);
592 + }
593 clear_highpage(page);
594 - inode->i_blocks += BLOCKS_PER_PAGE;
595 add_to_page_cache (page, mapping, idx);
596 }
597
598 @@ -594,23 +643,63 @@
599 if (info->locked)
600 page_cache_get(page);
601 return page;
602 -no_space:
603 - spin_unlock (&sbinfo->stat_lock);
604 - return ERR_PTR(-ENOSPC);
605
606 wait_retry:
607 - spin_unlock (&info->lock);
608 + spin_unlock(&info->lock);
609 wait_on_page(page);
610 page_cache_release(page);
611 goto repeat;
612 }
613
614 +static struct page * shmem_getbigpage_locked(shmem_info_t *info, struct inode * inode, unsigned long idx)
615 +{
616 + unsigned long bigidx, offset;
617 + struct page *page;
618 +
619 + bigidx = idx / BIGPAGE_PAGES;
620 + offset = idx % BIGPAGE_PAGES;
621 +
622 + if (bigidx >= info->max_bigpages)
623 + return ERR_PTR(-ENOSPC);
624 +got_bigpage:
625 + page = info->bigpages[bigidx];
626 + if (page) {
627 + page += offset;
628 + get_page(page);
629 + if (!BigPage(page))
630 + BUG();
631 + lock_page(page);
632 + return page;
633 + }
634 +
635 + if (shm_alloc_space(inode, BIGPAGE_PAGES))
636 + return ERR_PTR(-ENOSPC);
637 +
638 + page = alloc_bigpage();
639 + if (!page) {
640 + shm_free_space(inode, BIGPAGE_PAGES);
641 + return ERR_PTR(-ENOSPC);
642 + }
643 +
644 + spin_lock(&info->lock);
645 + if (info->bigpages[bigidx]) {
646 + spin_unlock(&info->lock);
647 + free_bigpage(page);
648 + shm_free_space(inode, BIGPAGE_PAGES);
649 + goto got_bigpage;
650 + }
651 + info->bigpages[bigidx] = page;
652 + spin_unlock(&info->lock);
653 +
654 + goto got_bigpage;
655 +}
656 +
657 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
658 {
659 - struct shmem_inode_info *info = SHMEM_I(inode);
660 + shmem_info_t *info = SHMEM_I(inode);
661 int error;
662
663 - down (&info->sem);
664 + down(&info->sem);
665 *ptr = ERR_PTR(-EFAULT);
666 if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
667 goto failed;
668 @@ -620,10 +709,36 @@
669 goto failed;
670
671 UnlockPage(*ptr);
672 - up (&info->sem);
673 + up(&info->sem);
674 + return 0;
675 +failed:
676 + up(&info->sem);
677 + error = PTR_ERR(*ptr);
678 + *ptr = NOPAGE_SIGBUS;
679 + if (error == -ENOMEM)
680 + *ptr = NOPAGE_OOM;
681 + return error;
682 +}
683 +
684 +static int shmem_getbigpage(struct inode * inode, unsigned long idx, struct page **ptr)
685 +{
686 + shmem_info_t *info = SHMEM_I(inode);
687 + int error;
688 +
689 + down(&info->sem);
690 + *ptr = ERR_PTR(-EFAULT);
691 + if (inode->i_size <= (loff_t) idx * PAGE_SIZE)
692 + goto failed;
693 +
694 + *ptr = shmem_getbigpage_locked(info, inode, idx);
695 + if (IS_ERR (*ptr))
696 + goto failed;
697 +
698 + UnlockPage(*ptr);
699 + up(&info->sem);
700 return 0;
701 failed:
702 - up (&info->sem);
703 + up(&info->sem);
704 error = PTR_ERR(*ptr);
705 *ptr = NOPAGE_SIGBUS;
706 if (error == -ENOMEM)
707 @@ -633,10 +748,31 @@
708
709 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
710 {
711 - struct page * page;
712 - unsigned int idx;
713 struct inode * inode = vma->vm_file->f_dentry->d_inode;
714 + int bigpage = vma->vm_flags & VM_BIGPAGE;
715 + unsigned long idx, bigidx;
716 + struct page * page = NULL;
717 +
718 + if (I_BIGPAGE(inode)) {
719 + if (no_share)
720 + BUG();
721
722 + idx = (address - vma->vm_start) >> PAGE_SHIFT;
723 + idx += vma->vm_pgoff;
724 +
725 + if (shmem_getbigpage(inode, idx, &page))
726 + return page;
727 +
728 + if (bigpage) {
729 + put_page(page);
730 + bigidx = idx / BIGPAGE_PAGES;
731 + if (bigidx >= SHMEM_I(inode)->max_bigpages)
732 + BUG();
733 + page = SHMEM_I(inode)->bigpages[bigidx];
734 + get_page(page);
735 + }
736 + return page;
737 + }
738 idx = (address - vma->vm_start) >> PAGE_CACHE_SHIFT;
739 idx += vma->vm_pgoff;
740
741 @@ -662,7 +798,7 @@
742 void shmem_lock(struct file * file, int lock)
743 {
744 struct inode * inode = file->f_dentry->d_inode;
745 - struct shmem_inode_info * info = SHMEM_I(inode);
746 + shmem_info_t * info = SHMEM_I(inode);
747 struct page * page;
748 unsigned long idx, size;
749
750 @@ -670,6 +806,8 @@
751 if (info->locked == lock)
752 goto out;
753 info->locked = lock;
754 + if (SHMEM_I(inode)->bigpages)
755 + goto out;
756 size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
757 for (idx = 0; idx < size; idx++) {
758 page = find_lock_page(inode->i_mapping, idx);
759 @@ -686,32 +824,97 @@
760 up(&info->sem);
761 }
762
763 +
764 +int shmem_make_bigpage_mmap(struct file * file, struct vm_area_struct * vma)
765 +{
766 + struct inode *inode = file->f_dentry->d_inode;
767 + unsigned long pages;
768 + shmem_info_t *info;
769 + int bigpage;
770 +
771 + /*
772 + * COW of 4MB/2MB pages is ... an interesting concept. Disallow it.
773 + */
774 + if (!(vma->vm_flags & VM_SHARED))
775 + return -EINVAL;
776 +
777 + info = SHMEM_I(inode);
778 + /*
779 + * Make sure the bigpage area is properly aligned and
780 + * properly sized, both on the virtual and on the
781 + * physical side.
782 + */
783 + bigpage = 0;
784 + if ((vma->vm_flags & VM_BIGMAP) || shm_use_bigpages)
785 + bigpage = 1;
786 + if (vma->vm_start & BIGPAGE_MASK)
787 + bigpage = 0;
788 + if (vma->vm_end & BIGPAGE_MASK)
789 + bigpage = 0;
790 + if (vma->vm_pgoff % BIGPAGE_PAGES)
791 + bigpage = 0;
792 + if (!bigpage && (vma->vm_flags & VM_BIGMAP))
793 + return -EINVAL;
794 +
795 + pages = (vma->vm_end - vma->vm_start) / PAGE_SIZE + vma->vm_pgoff;
796 + pages >>= (BIGPAGE_SHIFT - PAGE_SHIFT);
797 + if (pages > info->max_bigpages)
798 + return -ENOSPC;
799 +
800 + vma->vm_flags |= VM_LOCKED;
801 + if (bigpage)
802 + vma->vm_flags |= VM_BIGPAGE;
803 + return 0;
804 +}
805 +
806 static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
807 {
808 - struct vm_operations_struct * ops;
809 + struct vm_operations_struct * ops = &shmem_vm_ops;
810 struct inode *inode = file->f_dentry->d_inode;
811
812 - ops = &shmem_vm_ops;
813 if (!inode->i_sb || !S_ISREG(inode->i_mode))
814 return -EACCES;
815 + if (I_BIGPAGE(inode)) {
816 + int error = shmem_make_bigpage_mmap(file, vma);
817 + if (error)
818 + return error;
819 + }
820 UPDATE_ATIME(inode);
821 vma->vm_ops = ops;
822 + vma->vm_flags &= ~VM_BIGMAP;
823 + return 0;
824 +}
825 +
826 +int shmem_munmap(struct vm_area_struct * vma, unsigned long addr, size_t size)
827 +{
828 + int bigpage = vma->vm_flags & VM_BIGPAGE;
829 +
830 + /*
831 + * Make sure the unmapped bigpage area is properly aligned and
832 + * properly sized:
833 + */
834 + if (bigpage) {
835 + if (addr & BIGPAGE_MASK)
836 + return -EINVAL;
837 + if (size & BIGPAGE_MASK)
838 + return -EINVAL;
839 + }
840 return 0;
841 }
842
843 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
844 {
845 struct inode * inode;
846 - struct shmem_inode_info *info;
847 + shmem_info_t *info;
848 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
849
850 - spin_lock (&sbinfo->stat_lock);
851 + spin_lock(&sbinfo->stat_lock);
852 if (!sbinfo->free_inodes) {
853 - spin_unlock (&sbinfo->stat_lock);
854 + spin_unlock(&sbinfo->stat_lock);
855 return NULL;
856 }
857 sbinfo->free_inodes--;
858 - spin_unlock (&sbinfo->stat_lock);
859 + spin_unlock(&sbinfo->stat_lock);
860
861 inode = new_inode(sb);
862 if (inode) {
863 @@ -727,6 +930,8 @@
864 info->inode = inode;
865 spin_lock_init (&info->lock);
866 sema_init (&info->sem, 1);
867 + if (info->bigpages)
868 + BUG();
869 switch (mode & S_IFMT) {
870 default:
871 init_special_inode(inode, mode, dev);
872 @@ -734,9 +939,9 @@
873 case S_IFREG:
874 inode->i_op = &shmem_inode_operations;
875 inode->i_fop = &shmem_file_operations;
876 - spin_lock (&shmem_ilock);
877 - list_add (&SHMEM_I(inode)->list, &shmem_inodes);
878 - spin_unlock (&shmem_ilock);
879 + spin_lock(&shmem_ilock);
880 + list_add(&SHMEM_I(inode)->list, &shmem_inodes);
881 + spin_unlock(&shmem_ilock);
882 break;
883 case S_IFDIR:
884 inode->i_nlink++;
885 @@ -783,14 +988,23 @@
886 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
887 {
888 struct inode *inode = file->f_dentry->d_inode;
889 - struct shmem_inode_info *info;
890 + shmem_info_t *info = SHMEM_I(inode);
891 unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
892 + unsigned int page_size, page_shift;
893 loff_t pos;
894 struct page *page;
895 unsigned long written;
896 long status;
897 int err;
898
899 + if (I_BIGPAGE(inode)) {
900 + page_size = PAGE_SIZE;
901 + page_shift = PAGE_SHIFT;
902 + } else {
903 + page_size = PAGE_CACHE_SIZE;
904 + page_shift = PAGE_CACHE_SHIFT;
905 + }
906 +
907 if ((ssize_t) count < 0)
908 return -EINVAL;
909
910 @@ -845,9 +1059,9 @@
911 * Try to find the page in the cache. If it isn't there,
912 * allocate a free page.
913 */
914 - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
915 - index = pos >> PAGE_CACHE_SHIFT;
916 - bytes = PAGE_CACHE_SIZE - offset;
917 + offset = (pos & (page_size -1)); /* Within page */
918 + index = pos >> page_shift;
919 + bytes = page_size - offset;
920 if (bytes > count) {
921 bytes = count;
922 deactivate = 0;
923 @@ -859,24 +1073,26 @@
924 * same page as we're writing to, without it being marked
925 * up-to-date.
926 */
927 - { volatile unsigned char dummy;
928 + {
929 + volatile unsigned char dummy;
930 __get_user(dummy, buf);
931 __get_user(dummy, buf+bytes-1);
932 }
933
934 - info = SHMEM_I(inode);
935 - down (&info->sem);
936 - page = shmem_getpage_locked(info, inode, index);
937 - up (&info->sem);
938 + down(&info->sem);
939 + if (I_BIGPAGE(inode))
940 + page = shmem_getbigpage_locked(info, inode, index);
941 + else
942 + page = shmem_getpage_locked(info, inode, index);
943 + up(&info->sem);
944
945 status = PTR_ERR(page);
946 if (IS_ERR(page))
947 break;
948
949 /* We have exclusive IO access to the page.. */
950 - if (!PageLocked(page)) {
951 + if (!PageLocked(page))
952 PAGE_BUG(page);
953 - }
954
955 kaddr = kmap(page);
956 status = copy_from_user(kaddr+offset, buf, bytes);
957 @@ -899,7 +1115,10 @@
958 UnlockPage(page);
959 if (deactivate)
960 deactivate_page(page);
961 - page_cache_release(page);
962 + if (I_BIGPAGE(inode))
963 + __free_page(page);
964 + else
965 + page_cache_release(page);
966
967 if (status < 0)
968 break;
969 @@ -921,29 +1140,43 @@
970 {
971 struct inode *inode = filp->f_dentry->d_inode;
972 struct address_space *mapping = inode->i_mapping;
973 + unsigned int page_size, page_shift, page_mask;
974 unsigned long index, offset;
975 int nr = 1;
976
977 - index = *ppos >> PAGE_CACHE_SHIFT;
978 - offset = *ppos & ~PAGE_CACHE_MASK;
979 + if (I_BIGPAGE(inode)) {
980 + page_size = PAGE_SIZE;
981 + page_shift = PAGE_SHIFT;
982 + page_mask = PAGE_MASK;
983 + } else {
984 + page_size = PAGE_CACHE_SIZE;
985 + page_shift = PAGE_CACHE_SHIFT;
986 + page_mask = PAGE_CACHE_MASK;
987 + }
988 + index = *ppos >> page_shift;
989 + offset = *ppos & ~page_mask;
990
991 while (nr && desc->count) {
992 struct page *page;
993 unsigned long end_index, nr;
994
995 - end_index = inode->i_size >> PAGE_CACHE_SHIFT;
996 + end_index = inode->i_size >> page_shift;
997 if (index > end_index)
998 break;
999 - nr = PAGE_CACHE_SIZE;
1000 + nr = page_size;
1001 if (index == end_index) {
1002 - nr = inode->i_size & ~PAGE_CACHE_MASK;
1003 + nr = inode->i_size & ~page_mask;
1004 if (nr <= offset)
1005 break;
1006 }
1007
1008 nr = nr - offset;
1009
1010 - if ((desc->error = shmem_getpage(inode, index, &page)))
1011 + if (I_BIGPAGE(inode))
1012 + desc->error = shmem_getbigpage(inode, index, &page);
1013 + else
1014 + desc->error = shmem_getpage(inode, index, &page);
1015 + if (desc->error)
1016 break;
1017
1018 if (mapping->i_mmap_shared != NULL)
1019 @@ -961,13 +1194,16 @@
1020 */
1021 nr = file_read_actor(desc, page, offset, nr);
1022 offset += nr;
1023 - index += offset >> PAGE_CACHE_SHIFT;
1024 - offset &= ~PAGE_CACHE_MASK;
1025 -
1026 - page_cache_release(page);
1027 + index += offset >> page_shift;
1028 + offset &= ~page_mask;
1029 +
1030 + if (I_BIGPAGE(inode))
1031 + __free_page(page);
1032 + else
1033 + page_cache_release(page);
1034 }
1035
1036 - *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1037 + *ppos = ((loff_t) index << page_shift) + offset;
1038 UPDATE_ATIME(inode);
1039 }
1040
1041 @@ -1002,12 +1238,12 @@
1042
1043 buf->f_type = TMPFS_MAGIC;
1044 buf->f_bsize = PAGE_CACHE_SIZE;
1045 - spin_lock (&sbinfo->stat_lock);
1046 + spin_lock(&sbinfo->stat_lock);
1047 buf->f_blocks = sbinfo->max_blocks;
1048 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1049 buf->f_files = sbinfo->max_inodes;
1050 buf->f_ffree = sbinfo->free_inodes;
1051 - spin_unlock (&sbinfo->stat_lock);
1052 + spin_unlock(&sbinfo->stat_lock);
1053 buf->f_namelen = 255;
1054 return 0;
1055 }
1056 @@ -1035,6 +1271,8 @@
1057 d_instantiate(dentry, inode);
1058 dget(dentry); /* Extra count - pin the dentry in core */
1059 error = 0;
1060 + if (shm_use_bigpages > 1)
1061 + shm_enable_bigpages(dentry->d_inode, NULL, BIGPAGE_SIZE, 0);
1062 }
1063 return error;
1064 }
1065 @@ -1153,7 +1391,7 @@
1066 struct inode *inode;
1067 struct page *page;
1068 char *kaddr;
1069 - struct shmem_inode_info * info;
1070 + shmem_info_t * info;
1071
1072 error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
1073 if (error)
1074 @@ -1166,14 +1404,14 @@
1075 inode = dentry->d_inode;
1076 info = SHMEM_I(inode);
1077 inode->i_size = len;
1078 - if (len <= sizeof(struct shmem_inode_info)) {
1079 + if (len <= sizeof(shmem_info_t)) {
1080 /* do it inline */
1081 memcpy(info, symname, len);
1082 inode->i_op = &shmem_symlink_inline_operations;
1083 } else {
1084 - spin_lock (&shmem_ilock);
1085 - list_add (&info->list, &shmem_inodes);
1086 - spin_unlock (&shmem_ilock);
1087 + spin_lock(&shmem_ilock);
1088 + list_add(&info->list, &shmem_inodes);
1089 + spin_unlock(&shmem_ilock);
1090 down(&info->sem);
1091 page = shmem_getpage_locked(info, inode, 0);
1092 if (IS_ERR(page)) {
1093 @@ -1299,6 +1537,7 @@
1094 {
1095 return 0;
1096 }
1097 +
1098 #endif
1099
1100 static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
1101 @@ -1347,7 +1586,70 @@
1102 return sb;
1103 }
1104
1105 +/*
1106 + * Limit kmalloc() size.
1107 + */
1108 +#define MAX_BIGPAGES (16000/sizeof(void *))
1109 +
1110 +void shm_enable_bigpages(struct inode *inode, struct file *filp, size_t size, int prealloc)
1111 +{
1112 + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1113 + shmem_info_t * info = SHMEM_I(inode);
1114 + unsigned long pages, i, j;
1115 +
1116 + if (!S_ISREG(inode->i_mode))
1117 + return;
1118 + if ((size < 1 /*BIGPAGE_SIZE*/) || info->bigpages || !nr_bigpages)
1119 + return;
1120 + info->max_bigpages = sbinfo->max_blocks >> (BIGPAGE_SHIFT - PAGE_CACHE_SHIFT);
1121 + if (info->max_bigpages > MAX_BIGPAGES)
1122 + info->max_bigpages = MAX_BIGPAGES;
1123 + if (!info->max_bigpages)
1124 + return;
1125 + info->bigpages = (struct page **) kmalloc(info->max_bigpages * sizeof(struct page *), GFP_KERNEL);
1126 + if (!info->bigpages)
1127 + return;
1128 + memset(info->bigpages, 0, info->max_bigpages * sizeof(struct page *));
1129 + if (prealloc) {
1130 + pages = size / BIGPAGE_SIZE;
1131 + for (i = 0; i < pages; i++) {
1132 + if (!shm_alloc_space(inode, BIGPAGE_PAGES)) {
1133 + info->bigpages[i] = alloc_bigpage();
1134 + if (!info->bigpages[i])
1135 + shm_free_space(inode, BIGPAGE_PAGES);
1136 + }
1137 + if (!info->bigpages[i]) {
1138 + for (j = 0; j < i; j++) {
1139 + shm_free_space(inode, BIGPAGE_PAGES);
1140 + free_bigpage(info->bigpages[j]);
1141 + info->bigpages[j] = NULL;
1142 + }
1143 + kfree(info->bigpages);
1144 + info->max_bigpages = 0;
1145 + info->bigpages = NULL;
1146 + return;
1147 + }
1148 + }
1149 + }
1150 +}
1151
1152 +static int shmem_ioctl(struct inode * inode, struct file * filp,
1153 + unsigned int cmd, unsigned long arg)
1154 +{
1155 + switch (cmd) {
1156 + case SHMEM_IOC_SETBIGPAGES:
1157 + if (!HAVE_ARCH_BIGPAGES || !nr_bigpages)
1158 + return -EINVAL;
1159 + if (inode->i_size || SHMEM_I(inode)->swapped)
1160 + return -EINVAL;
1161 + down(&inode->i_sem);
1162 + shm_enable_bigpages(inode, filp, BIGPAGE_SIZE, 0);
1163 + up(&inode->i_sem);
1164 + break;
1165 + default:
1166 + }
1167 + return 0;
1168 +}
1169
1170 static struct address_space_operations shmem_aops = {
1171 removepage: shmem_removepage,
1172 @@ -1356,10 +1658,12 @@
1173
1174 static struct file_operations shmem_file_operations = {
1175 mmap: shmem_mmap,
1176 + munmap: shmem_munmap,
1177 #ifdef CONFIG_TMPFS
1178 read: shmem_file_read,
1179 write: shmem_file_write,
1180 fsync: shmem_sync_file,
1181 + ioctl: shmem_ioctl,
1182 #endif
1183 };
1184
1185 @@ -1523,6 +1827,8 @@
1186 file = shmem_file_setup("dev/zero", size);
1187 if (IS_ERR(file))
1188 return PTR_ERR(file);
1189 + if (shm_use_bigpages)
1190 + shm_enable_bigpages(file->f_dentry->d_inode, file, BIGPAGE_SIZE, 0);
1191
1192 if (vma->vm_file)
1193 fput (vma->vm_file);
1194 --- linux/mm/mlock.c.orig Sun Feb 24 21:18:24 2002
1195 +++ linux/mm/mlock.c Sun Feb 24 21:18:51 2002
1196 @@ -162,6 +162,8 @@
1197 vma = find_vma(current->mm, start);
1198 if (!vma || vma->vm_start > start)
1199 return -ENOMEM;
1200 + if (vma->vm_flags & VM_BIGPAGE)
1201 + return -EINVAL;
1202
1203 for (nstart = start ; ; ) {
1204 unsigned int newflags;
1205 --- linux/mm/mmap.c.orig Sun Feb 24 17:10:07 2002
1206 +++ linux/mm/mmap.c Wed Mar 6 20:47:33 2002
1207 @@ -199,6 +199,7 @@
1208 flag_bits =
1209 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
1210 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
1211 + _trans(flags, MAP_BIGPAGE, VM_BIGMAP) |
1212 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
1213 return prot_bits | flag_bits;
1214 #undef _trans
1215 @@ -329,7 +330,7 @@
1216 vma->vm_start = addr;
1217 vma->vm_end = addr + len;
1218 vma->vm_flags = vm_flags;
1219 - vma->vm_page_prot = protection_map[vm_flags & 0x0f];
1220 + vma->vm_page_prot = protection_map[vm_flags & MAP_TYPE];
1221 vma->vm_ops = NULL;
1222 vma->vm_pgoff = pgoff;
1223 vma->vm_file = NULL;
1224 @@ -348,9 +349,18 @@
1225 error = file->f_op->mmap(file, vma);
1226 if (error)
1227 goto unmap_and_free_vma;
1228 - } else if (flags & MAP_SHARED) {
1229 - error = shmem_zero_setup(vma);
1230 - if (error)
1231 + if (vma->vm_flags & VM_BIGMAP) {
1232 + error = -EINVAL;
1233 + goto unmap_and_free_vma;
1234 + }
1235 + } else {
1236 + if (flags & MAP_SHARED) {
1237 + error = shmem_zero_setup(vma);
1238 + if (error)
1239 + goto free_vma;
1240 + }
1241 + error = -EINVAL;
1242 + if (vma->vm_flags & VM_BIGMAP)
1243 goto free_vma;
1244 }
1245
1246 @@ -751,6 +761,10 @@
1247 if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
1248 && mm->map_count >= max_map_count)
1249 return -ENOMEM;
1250 + if (mpnt->vm_file && mpnt->vm_file->f_op &&
1251 + mpnt->vm_file->f_op->munmap)
1252 + if (mpnt->vm_file->f_op->munmap(mpnt, addr, len))
1253 + return 0; // FIXME: -EINVAL;
1254
1255 /*
1256 * We may need one additional vma to fix up the mappings ...
1257 @@ -1056,8 +1070,13 @@
1258 mm->map_count--;
1259 __remove_shared_vm_struct(next);
1260 spin_unlock(&mm->page_table_lock);
1261 + if (next->vm_ops && next->vm_ops->close)
1262 + next->vm_ops->close(vma);
1263 unlock_vma_mappings(vma);
1264 -
1265 + if (next->vm_file) {
1266 + fput(next->vm_file);
1267 + next->vm_file = NULL;
1268 + }
1269 kmem_cache_free(vm_area_cachep, next);
1270 }
1271
1272 --- linux/mm/memory.c.orig Sun Feb 24 18:05:08 2002
1273 +++ linux/mm/memory.c Thu Mar 7 03:23:03 2002
1274 @@ -82,6 +82,8 @@
1275
1276 if (pmd_none(*dir))
1277 return;
1278 + if (pmd_bigpage(*dir))
1279 + return;
1280 if (pmd_bad(*dir)) {
1281 pmd_ERROR(*dir);
1282 pmd_clear(dir);
1283 @@ -179,6 +181,7 @@
1284 unsigned long address = vma->vm_start;
1285 unsigned long end = vma->vm_end;
1286 unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
1287 + int bigpage = (vma->vm_flags & VM_BIGPAGE);
1288
1289 src_pgd = pgd_offset(src, address)-1;
1290 dst_pgd = pgd_offset(dst, address)-1;
1291 @@ -213,6 +216,18 @@
1292
1293 if (pmd_none(*src_pmd))
1294 goto skip_copy_pte_range;
1295 + if (bigpage) {
1296 + if (!pmd_bigpage(*src_pmd))
1297 + pmd_clear(dst_pmd);
1298 + else
1299 + *dst_pmd = *src_pmd;
1300 + address += PMD_SIZE;
1301 + if (address >= end)
1302 + goto out;
1303 + goto cont_copy_pmd_range;
1304 + }
1305 + if (pmd_bigpage(*src_pmd))
1306 + BUG();
1307 if (pmd_bad(*src_pmd)) {
1308 pmd_ERROR(*src_pmd);
1309 pmd_clear(src_pmd);
1310 @@ -297,11 +312,13 @@
1311 static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
1312 {
1313 unsigned long offset;
1314 - pte_t *mapped, *ptep;
1315 int freed = 0;
1316 + pte_t *ptep;
1317
1318 if (pmd_none(*pmd))
1319 return 0;
1320 + if (pmd_bigpage(*pmd))
1321 + BUG();
1322 if (pmd_bad(*pmd)) {
1323 pmd_ERROR(*pmd);
1324 pmd_clear(pmd);
1325 @@ -351,7 +368,10 @@
1326 end = ((address + PGDIR_SIZE) & PGDIR_MASK);
1327 freed = 0;
1328 do {
1329 - freed += zap_pte_range(tlb, pmd, address, end - address);
1330 + if (pmd_bigpage(*pmd))
1331 + pmd_clear(pmd);
1332 + else
1333 + freed += zap_pte_range(tlb, pmd, address, end - address);
1334 address = (address + PMD_SIZE) & PMD_MASK;
1335 pmd++;
1336 } while (address < end);
1337 @@ -403,11 +423,10 @@
1338 spin_unlock(&mm->page_table_lock);
1339 }
1340
1341 -
1342 /*
1343 * Do a quick page-table lookup for a single page.
1344 */
1345 -static struct page * follow_page(unsigned long address, int write)
1346 +struct page * follow_page(unsigned long address, int write)
1347 {
1348 pgd_t *pgd;
1349 pmd_t *pmd;
1350 @@ -418,6 +437,8 @@
1351 goto out;
1352
1353 pmd = pmd_offset(pgd, address);
1354 + if (pmd_bigpage(*pmd))
1355 + return pmd_page(*pmd) + (address & BIGPAGE_MASK) / PAGE_SIZE;
1356 if (pmd_none(*pmd) || pmd_bad(*pmd))
1357 goto out;
1358
1359 @@ -1381,6 +1402,40 @@
1360 return 1;
1361 }
1362
1363 +static int handle_bigpage_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long long address, int write_access, pmd_t *pmd)
1364 +{
1365 + struct inode *inode = vma->vm_file->f_dentry->d_inode;
1366 + struct page *new_page;
1367 +
1368 + spin_unlock(&mm->page_table_lock);
1369 + down_read(&inode->i_truncate_sem);
1370 + new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
1371 + up_read(&inode->i_truncate_sem);
1372 + spin_lock(&mm->page_table_lock);
1373 +
1374 + if (!new_page)
1375 + return 0;
1376 + if (new_page == NOPAGE_OOM)
1377 + return -1;
1378 + if (!BigPage(new_page))
1379 + BUG();
1380 + /*
1381 + * Another context was faster.
1382 + */
1383 + if (pmd_present(*pmd)) {
1384 + if (pmd_bigpage(*pmd))
1385 + return 1;
1386 + free_one_pmd(pmd);
1387 + }
1388 + /*
1389 + * Major fault.
1390 + */
1391 + pmd_populate_bigpage(mm, pmd, new_page);
1392 + flush_tlb_page(vma, address & PMD_MASK);
1393 + update_mmu_cache(vma, address, entry);
1394 + return 2;
1395 +}
1396 +
1397 /*
1398 * By the time we get here, we already hold the mm semaphore
1399 */
1400 @@ -1390,6 +1445,7 @@
1401 int ret = -1;
1402 pgd_t *pgd;
1403 pmd_t *pmd;
1404 + int bigpage = (vma->vm_flags & VM_BIGPAGE);
1405
1406 current->state = TASK_RUNNING;
1407 pgd = pgd_offset(mm, address);
1408 @@ -1401,13 +1457,15 @@
1409 spin_lock(&mm->page_table_lock);
1410 pmd = pmd_alloc(mm, pgd, address);
1411
1412 - if (pmd) {
1413 + if (pmd && !bigpage) {
1414 pte_t * pte = pte_alloc_map(mm, pmd, address);
1415 if (pte) {
1416 ret = handle_pte_fault(mm, vma, address, write_access, &pte, pmd);
1417 pte_unmap(pte);
1418 }
1419 - }
1420 + } else
1421 + if (pmd)
1422 + ret = handle_bigpage_fault(mm, vma, address, write_access, pmd);
1423 spin_unlock(&mm->page_table_lock);
1424 return ret;
1425 }
1426 @@ -1712,6 +1770,8 @@
1427 if (!pgd_none(*pgd)) {
1428 pmd = pmd_offset(pgd, addr);
1429 if (!pmd_none(*pmd)) {
1430 + if (pmd_bigpage(*pmd))
1431 + BUG();
1432 ptep = pte_offset_map(pmd, addr);
1433 pte = *ptep;
1434 if (pte_present(pte))
1435 --- linux/mm/mremap.c.orig Sun Feb 24 19:59:04 2002
1436 +++ linux/mm/mremap.c Mon Feb 25 18:17:48 2002
1437 @@ -245,6 +245,11 @@
1438 vma = find_vma(current->mm, addr);
1439 if (!vma || vma->vm_start > addr)
1440 goto out;
1441 + /*
1442 + * Do not remap bigpages, yet.
1443 + */
1444 + if (vma->vm_flags & VM_BIGPAGE)
1445 + goto out;
1446 /* We can't remap across vm area boundaries */
1447 if (old_len > vma->vm_end - addr)
1448 goto out;
1449 --- linux/mm/mprotect.c.orig Sun Feb 24 20:04:35 2002
1450 +++ linux/mm/mprotect.c Wed Mar 6 00:55:20 2002
1451 @@ -21,6 +21,9 @@
1452
1453 if (pmd_none(*pmd))
1454 return;
1455 + // FIXME: we now silently 'succeed', we should split up the bigpage.
1456 + if (pmd_bigpage(*pmd))
1457 + return;
1458 if (pmd_bad(*pmd)) {
1459 pmd_ERROR(*pmd);
1460 pmd_clear(pmd);
1461 @@ -250,6 +253,10 @@
1462 vma = find_vma(current->mm, start);
1463 error = -EFAULT;
1464 if (!vma || vma->vm_start > start)
1465 + goto out;
1466 +
1467 + error = 0;
1468 + if (vma->vm_flags & VM_BIGPAGE)
1469 goto out;
1470
1471 for (nstart = start ; ; ) {
1472 --- linux/mm/Makefile.orig Sun Feb 24 22:05:59 2002
1473 +++ linux/mm/Makefile Sun Feb 24 13:04:26 2002
1474 @@ -14,7 +14,7 @@
1475 obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
1476 vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
1477 page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
1478 - shmem.o
1479 + shmem.o bigpages.o
1480
1481 obj-$(CONFIG_HIGHMEM) += highmem.o
1482 obj-y += wtd.o
1483 --- linux/include/linux/mm.h.orig Sun Feb 24 13:57:11 2002
1484 +++ linux/include/linux/mm.h Thu Mar 7 04:48:18 2002
1485 @@ -102,6 +102,8 @@
1486 #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
1487 #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
1488 #define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
1489 +#define VM_BIGPAGE 0x00100000 /* bigpage mappings, no pte's */
1490 +#define VM_BIGMAP 0x00200000 /* user wants bigpage mapping */
1491
1492 #define VM_STACK_FLAGS 0x00000177
1493
1494 @@ -286,6 +288,7 @@
1495 #define PG_inactive_clean 11
1496 #define PG_highmem 12
1497 #define PG_checked 13 /* kill me in 2.5.<early>. */
1498 +#define PG_bigpage 14
1499 /* bits 21-29 unused */
1500 #define PG_arch_1 30
1501 #define PG_reserved 31
1502 @@ -302,6 +305,7 @@
1503 #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags)
1504 #define PageChecked(page) test_bit(PG_checked, &(page)->flags)
1505 #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
1506 +#define BigPage(page) test_bit(PG_bigpage, &(page)->flags)
1507
1508 extern void __set_page_dirty(struct page *);
1509
1510 @@ -424,6 +428,10 @@
1511 * The old interface name will be removed in 2.5:
1512 */
1513 #define get_free_page get_zeroed_page
1514 +
1515 +extern long nr_bigpages;
1516 +extern struct page * FASTCALL(alloc_bigpage(void));
1517 +extern void FASTCALL(free_bigpage(struct page *page));
1518
1519 /*
1520 * There is only one 'core' page-freeing function.
1521 --- linux/include/linux/shmem_fs.h.orig Sun Feb 24 13:06:31 2002
1522 +++ linux/include/linux/shmem_fs.h Tue Mar 5 22:34:13 2002
1523 @@ -19,7 +19,7 @@
1524
1525 extern atomic_t shmem_nrpages;
1526
1527 -struct shmem_inode_info {
1528 +typedef struct shmem_inode_info {
1529 spinlock_t lock;
1530 struct semaphore sem;
1531 unsigned long next_index;
1532 @@ -29,7 +29,9 @@
1533 int locked; /* into memory */
1534 struct list_head list;
1535 struct inode *inode;
1536 -};
1537 + unsigned long max_bigpages;
1538 + struct page **bigpages;
1539 +} shmem_info_t;
1540
1541 struct shmem_sb_info {
1542 unsigned long max_blocks; /* How many blocks are allowed */
1543 @@ -39,6 +41,19 @@
1544 spinlock_t stat_lock;
1545 };
1546
1547 -#define SHMEM_I(inode) (&inode->u.shmem_i)
1548 +#define SHMEM_I(inode) (&inode->u.shmem_i)
1549 +#define I_BIGPAGE(inode) (SHMEM_I(inode)->bigpages)
1550 +
1551 +#define SHMEM_IOC_SETBIGPAGES _IO('f', 1)
1552 +
1553 +extern size_t shm_ctlmax;
1554 +extern void shm_enable_bigpages(struct inode *inode, struct file *filp, size_t size, int prealloc);
1555 +extern int shm_use_bigpages;
1556 +extern struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share);
1557 +extern int shmem_make_bigpage_mmap(struct file * file, struct vm_area_struct * vma);
1558 +
1559 +extern void check_fault_page(struct vm_area_struct *vma, unsigned long addr);
1560 +extern struct page * follow_page(unsigned long address, int write);
1561 +extern int shmem_munmap(struct vm_area_struct * vma, unsigned long addr, size_t size);
1562
1563 #endif
1564 --- linux/include/linux/fs.h.orig Mon Feb 25 13:22:41 2002
1565 +++ linux/include/linux/fs.h Thu Mar 7 04:48:15 2002
1566 @@ -855,6 +855,7 @@
1567 ssize_t (*aio_readx)(struct file *, struct kiocb *, struct iocb);
1568 ssize_t (*aio_write)(struct file *, struct kiocb *, struct iocb);
1569 ssize_t (*aio_fsync)(struct file *, struct kiocb *, struct iocb);
1570 + int (*munmap) (struct vm_area_struct *, unsigned long, size_t);
1571 };
1572
1573 struct inode_operations {
1574 --- linux/include/linux/sysctl.h.orig Tue Feb 26 09:36:17 2002
1575 +++ linux/include/linux/sysctl.h Tue Mar 5 19:41:47 2002
1576 @@ -125,6 +125,7 @@
1577 KERN_CADPID=54, /* int: PID of the process to notify on CAD */
1578 KERN_TAINTED=55, /* int: various kernel tainted flags */
1579 KERN_CHILD_RUNS_FIRST=56, /* int: child-runs-first forking */
1580 + KERN_SHMUSEBIGPAGES=57, /* int: use bigpages wherever possible */
1581 };
1582
1583
1584 --- linux/include/asm-i386/pgtable.h.orig Sun Feb 24 19:36:09 2002
1585 +++ linux/include/asm-i386/pgtable.h Thu Mar 7 04:48:15 2002
1586 @@ -262,6 +262,14 @@
1587 #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
1588 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
1589 #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
1590 +#define pmd_bigpage(x) (pmd_val(x) & _PAGE_PSE)
1591 +
1592 +#define BIGPAGE_SHIFT (PMD_SHIFT)
1593 +#define BIGPAGE_SIZE (1UL << BIGPAGE_SHIFT)
1594 +#define BIGPAGE_MASK (BIGPAGE_SIZE - 1)
1595 +#define BIGPAGE_PAGES (BIGPAGE_SIZE / PAGE_SIZE)
1596 +
1597 +#define HAVE_ARCH_BIGPAGES cpu_has_pse
1598
1599 /*
1600 * Permanent address of a page. Obviously must never be
1601 --- linux/include/asm-i386/pgalloc.h.orig Sun Feb 24 18:12:48 2002
1602 +++ linux/include/asm-i386/pgalloc.h Thu Mar 7 04:48:15 2002
1603 @@ -16,6 +16,18 @@
1604 ((unsigned long long)(pte - mem_map) <<
1605 (unsigned long long) PAGE_SHIFT)));
1606 }
1607 +
1608 +static inline void pmd_populate_bigpage(struct mm_struct *mm, pmd_t *pmd, struct page *page)
1609 +{
1610 + unsigned int idx = page - mem_map;
1611 +
1612 + if (idx & ((1 << (BIGPAGE_SHIFT - PAGE_SHIFT)) -1)) {
1613 + printk("ugh, page idx %d (%p) cannot be PSE page!\n", idx, page);
1614 + BUG();
1615 + }
1616 + set_pmd(pmd, __pmd(_PAGE_TABLE + _PAGE_PSE +
1617 + ((unsigned long long)idx << (unsigned long long) PAGE_SHIFT)));
1618 +}
1619 /*
1620 * Allocate and free page tables.
1621 */
1622 --- linux/include/asm-i386/mman.h.orig Mon Feb 25 12:16:26 2002
1623 +++ linux/include/asm-i386/mman.h Mon Feb 25 12:32:19 2002
1624 @@ -11,6 +11,7 @@
1625 #define MAP_TYPE 0x0f /* Mask for type of mapping */
1626 #define MAP_FIXED 0x10 /* Interpret addr exactly */
1627 #define MAP_ANONYMOUS 0x20 /* don't use a file */
1628 +#define MAP_BIGPAGE 0x40 /* bigpage mapping */
1629
1630 #define MAP_GROWSDOWN 0x0100 /* stack-like segment */
1631 #define MAP_DENYWRITE 0x0800 /* ETXTBSY */
1632 --- linux/include/asm-i386/atomic.h.orig Thu Feb 28 11:43:34 2002
1633 +++ linux/include/asm-i386/atomic.h Thu Mar 7 04:48:15 2002
1634 @@ -2,6 +2,7 @@
1635 #define __ARCH_I386_ATOMIC__
1636
1637 #include <linux/config.h>
1638 +#include <asm/page.h>
1639
1640 /*
1641 * Atomic operations that C can't guarantee us. Useful for
1642 --- linux/ipc/shm.c.orig Tue Feb 26 08:39:46 2002
1643 +++ linux/ipc/shm.c Wed Mar 6 11:15:40 2002
1644 @@ -124,6 +124,10 @@
1645 shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
1646 shm_rmid (shp->id);
1647 shmem_lock(shp->shm_file, 0);
1648 + /*
1649 + * Unlock the spinlock first, fput() might sleep.
1650 + */
1651 + shm_unlock(shp->id);
1652 fput (shp->shm_file);
1653 kfree (shp);
1654 }
1655 @@ -149,22 +153,30 @@
1656 shp->shm_nattch--;
1657 if(shp->shm_nattch == 0 &&
1658 shp->shm_flags & SHM_DEST)
1659 - shm_destroy (shp);
1660 -
1661 - shm_unlock(id);
1662 + shm_destroy(shp); /* unlocks */
1663 + else
1664 + shm_unlock(id);
1665 up (&shm_ids.sem);
1666 }
1667
1668 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
1669 {
1670 + int error;
1671 +
1672 UPDATE_ATIME(file->f_dentry->d_inode);
1673 + if (SHMEM_I(file->f_dentry->d_inode)->bigpages) {
1674 + error = shmem_make_bigpage_mmap(file, vma);
1675 + if (error)
1676 + return -EINVAL;
1677 + }
1678 vma->vm_ops = &shm_vm_ops;
1679 shm_inc(file->f_dentry->d_inode->i_ino);
1680 return 0;
1681 }
1682
1683 static struct file_operations shm_file_operations = {
1684 - mmap: shm_mmap
1685 + mmap: shm_mmap,
1686 + munmap: shmem_munmap
1687 };
1688
1689 static struct vm_operations_struct shm_vm_ops = {
1690 @@ -197,10 +209,16 @@
1691 if (IS_ERR(file))
1692 goto no_file;
1693
1694 + file->f_op = &shm_file_operations;
1695 + error = -ENOMEM;
1696 + if (shm_use_bigpages)
1697 + shm_enable_bigpages(file->f_dentry->d_inode, file, size, 1);
1698 +
1699 error = -ENOSPC;
1700 id = shm_addid(shp);
1701 if(id == -1)
1702 goto no_id;
1703 +
1704 shp->shm_perm.key = key;
1705 shp->shm_flags = (shmflg & S_IRWXUGO);
1706 shp->shm_cprid = current->pid;
1707 @@ -212,7 +230,6 @@
1708 shp->id = shm_buildid(id,shp->shm_perm.seq);
1709 shp->shm_file = file;
1710 file->f_dentry->d_inode->i_ino = shp->id;
1711 - file->f_op = &shm_file_operations;
1712 shm_tot += numpages;
1713 shm_unlock (id);
1714 return shp->id;
1715 @@ -511,11 +528,11 @@
1716 shp->shm_flags |= SHM_DEST;
1717 /* Do not find it any more */
1718 shp->shm_perm.key = IPC_PRIVATE;
1719 + /* Unlock */
1720 + shm_unlock(shmid);
1721 } else
1722 - shm_destroy (shp);
1723 + shm_destroy (shp); /* unlocks */
1724
1725 - /* Unlock */
1726 - shm_unlock(shmid);
1727 up(&shm_ids.sem);
1728 return err;
1729 }
1730 @@ -631,8 +648,9 @@
1731 shp->shm_nattch--;
1732 if(shp->shm_nattch == 0 &&
1733 shp->shm_flags & SHM_DEST)
1734 - shm_destroy (shp);
1735 - shm_unlock(shmid);
1736 + shm_destroy (shp); /* unlocks */
1737 + else
1738 + shm_unlock(shmid);
1739 up (&shm_ids.sem);
1740
1741 *raddr = (unsigned long) user_addr;
1742 @@ -649,18 +667,19 @@
1743 */
1744 asmlinkage long sys_shmdt (char *shmaddr)
1745 {
1746 + int error = -EINVAL;
1747 struct mm_struct *mm = current->mm;
1748 struct vm_area_struct *shmd, *shmdnext;
1749
1750 down_write(&mm->mmap_sem);
1751 for (shmd = mm->mmap; shmd; shmd = shmdnext) {
1752 shmdnext = shmd->vm_next;
1753 - if (shmd->vm_ops == &shm_vm_ops
1754 - && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
1755 - do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
1756 + if ((shmd->vm_ops == &shm_vm_ops) &&
1757 + (shmd->vm_start == (ulong) shmaddr))
1758 + error = do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
1759 }
1760 up_write(&mm->mmap_sem);
1761 - return 0;
1762 + return error;
1763 }
1764
1765 #ifdef CONFIG_PROC_FS
1766 --- linux/mm/mmap.c~ Tue Mar 26 10:59:01 2002
1767 +++ linux/mm/mmap.c Tue Mar 26 17:00:18 2002
1768 @@ -1070,9 +1070,9 @@
1769 mm->map_count--;
1770 __remove_shared_vm_struct(next);
1771 spin_unlock(&mm->page_table_lock);
1772 - if (next->vm_ops && next->vm_ops->close)
1773 - next->vm_ops->close(vma);
1774 unlock_vma_mappings(vma);
1775 + if (next->vm_ops && next->vm_ops->close)
1776 + next->vm_ops->close(next);
1777 if (next->vm_file) {
1778 fput(next->vm_file);
1779 next->vm_file = NULL;
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.