6154-const-ify-segment-ops-structures Wdiff usr/src/uts/common/vm/seg_kmem.c

Print this page

6154 const-ify segment ops structures

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kmem.c
          +++ new/usr/src/uts/common/vm/seg_kmem.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  #include <sys/types.h>
  26   26  #include <sys/t_lock.h>
  27   27  #include <sys/param.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/tuneable.h>
  30   30  #include <sys/systm.h>
  31   31  #include <sys/vm.h>
  32   32  #include <sys/kmem.h>
  33   33  #include <sys/vmem.h>
  34   34  #include <sys/mman.h>
  35   35  #include <sys/cmn_err.h>
  36   36  #include <sys/debug.h>
  37   37  #include <sys/dumphdr.h>
  38   38  #include <sys/bootconf.h>
  39   39  #include <sys/lgrp.h>
  40   40  #include <vm/seg_kmem.h>
  41   41  #include <vm/hat.h>
  42   42  #include <vm/page.h>
  43   43  #include <vm/vm_dep.h>
  44   44  #include <vm/faultcode.h>
  45   45  #include <sys/promif.h>
  46   46  #include <vm/seg_kp.h>
  47   47  #include <sys/bitmap.h>
  48   48  #include <sys/mem_cage.h>
  49   49  
  50   50  #ifdef __sparc
  51   51  #include <sys/ivintr.h>
  52   52  #include <sys/panic.h>
  53   53  #endif
  54   54  
  55   55  /*
  56   56   * seg_kmem is the primary kernel memory segment driver.  It
  57   57   * maps the kernel heap [kernelheap, ekernelheap), module text,
  58   58   * and all memory which was allocated before the VM was initialized
  59   59   * into kas.
  60   60   *
  61   61   * Pages which belong to seg_kmem are hashed into &kvp vnode at
  62   62   * an offset equal to (u_offset_t)virt_addr, and have p_lckcnt >= 1.
  63   63   * They must never be paged out since segkmem_fault() is a no-op to
  64   64   * prevent recursive faults.
  65   65   *
  66   66   * Currently, seg_kmem pages are sharelocked (p_sharelock == 1) on
  67   67   * __x86 and are unlocked (p_sharelock == 0) on __sparc.  Once __x86
  68   68   * supports relocation the #ifdef kludges can be removed.
  69   69   *
  70   70   * seg_kmem pages may be subject to relocation by page_relocate(),
  71   71   * provided that the HAT supports it; if this is so, segkmem_reloc
  72   72   * will be set to a nonzero value. All boot time allocated memory as
  73   73   * well as static memory is considered off limits to relocation.
  74   74   * Pages are "relocatable" if p_state does not have P_NORELOC set, so
  75   75   * we request P_NORELOC pages for memory that isn't safe to relocate.
  76   76   *
  77   77   * The kernel heap is logically divided up into four pieces:
  78   78   *
  79   79   *   heap32_arena is for allocations that require 32-bit absolute
  80   80   *   virtual addresses (e.g. code that uses 32-bit pointers/offsets).
  81   81   *
  82   82   *   heap_core is for allocations that require 2GB *relative*
  83   83   *   offsets; in other words all memory from heap_core is within
  84   84   *   2GB of all other memory from the same arena. This is a requirement
  85   85   *   of the addressing modes of some processors in supervisor code.
  86   86   *
  87   87   *   heap_arena is the general heap arena.
  88   88   *
  89   89   *   static_arena is the static memory arena.  Allocations from it
  90   90   *   are not subject to relocation so it is safe to use the memory
  91   91   *   physical address as well as the virtual address (e.g. the VA to
  92   92   *   PA translations are static).  Caches may import from static_arena;
  93   93   *   all other static memory allocations should use static_alloc_arena.
  94   94   *
  95   95   * On some platforms which have limited virtual address space, seg_kmem
  96   96   * may share [kernelheap, ekernelheap) with seg_kp; if this is so,
  97   97   * segkp_bitmap is non-NULL, and each bit represents a page of virtual
  98   98   * address space which is actually seg_kp mapped.
  99   99   */
 100  100  
 101  101  extern ulong_t *segkp_bitmap;   /* Is set if segkp is from the kernel heap */
 102  102  
 103  103  char *kernelheap;               /* start of primary kernel heap */
 104  104  char *ekernelheap;              /* end of primary kernel heap */
 105  105  struct seg kvseg;               /* primary kernel heap segment */
 106  106  struct seg kvseg_core;          /* "core" kernel heap segment */
 107  107  struct seg kzioseg;             /* Segment for zio mappings */
 108  108  vmem_t *heap_arena;             /* primary kernel heap arena */
 109  109  vmem_t *heap_core_arena;        /* core kernel heap arena */
 110  110  char *heap_core_base;           /* start of core kernel heap arena */
 111  111  char *heap_lp_base;             /* start of kernel large page heap arena */
 112  112  char *heap_lp_end;              /* end of kernel large page heap arena */
 113  113  vmem_t *hat_memload_arena;      /* HAT translation data */
 114  114  struct seg kvseg32;             /* 32-bit kernel heap segment */
 115  115  vmem_t *heap32_arena;           /* 32-bit kernel heap arena */
 116  116  vmem_t *heaptext_arena;         /* heaptext arena */
 117  117  struct as kas;                  /* kernel address space */
 118  118  int segkmem_reloc;              /* enable/disable relocatable segkmem pages */
 119  119  vmem_t *static_arena;           /* arena for caches to import static memory */
 120  120  vmem_t *static_alloc_arena;     /* arena for allocating static memory */
 121  121  vmem_t *zio_arena = NULL;       /* arena for allocating zio memory */
 122  122  vmem_t *zio_alloc_arena = NULL; /* arena for allocating zio memory */
 123  123  
 124  124  /*
 125  125   * seg_kmem driver can map part of the kernel heap with large pages.
 126  126   * Currently this functionality is implemented for sparc platforms only.
 127  127   *
 128  128   * The large page size "segkmem_lpsize" for kernel heap is selected in the
 129  129   * platform specific code. It can also be modified via /etc/system file.
 130  130   * Setting segkmem_lpsize to PAGESIZE in /etc/system disables usage of large
 131  131   * pages for kernel heap. "segkmem_lpshift" is adjusted appropriately to
 132  132   * match segkmem_lpsize.
 133  133   *
 134  134   * At boot time we carve from kernel heap arena a range of virtual addresses
 135  135   * that will be used for large page mappings. This range [heap_lp_base,
 136  136   * heap_lp_end) is set up as a separate vmem arena - "heap_lp_arena". We also
 137  137   * create "kmem_lp_arena" that caches memory already backed up by large
 138  138   * pages. kmem_lp_arena imports virtual segments from heap_lp_arena.
 139  139   */
 140  140  
 141  141  size_t  segkmem_lpsize;
 142  142  static  uint_t  segkmem_lpshift = PAGESHIFT;
 143  143  int     segkmem_lpszc = 0;
 144  144  
 145  145  size_t  segkmem_kmemlp_quantum = 0x400000;      /* 4MB */
 146  146  size_t  segkmem_heaplp_quantum;
 147  147  vmem_t *heap_lp_arena;
 148  148  static  vmem_t *kmem_lp_arena;
 149  149  static  vmem_t *segkmem_ppa_arena;
 150  150  static  segkmem_lpcb_t segkmem_lpcb;
 151  151  
 152  152  /*
 153  153   * We use "segkmem_kmemlp_max" to limit the total amount of physical memory
 154  154   * consumed by the large page heap. By default this parameter is set to 1/8 of
 155  155   * physmem but can be adjusted through /etc/system either directly or
 156  156   * indirectly by setting "segkmem_kmemlp_pcnt" to the percent of physmem
 157  157   * we allow for large page heap.
 158  158   */
 159  159  size_t  segkmem_kmemlp_max;
 160  160  static  uint_t  segkmem_kmemlp_pcnt;
 161  161  
 162  162  /*
 163  163   * Getting large pages for kernel heap could be problematic due to
 164  164   * physical memory fragmentation. That's why we allow to preallocate
 165  165   * "segkmem_kmemlp_min" bytes at boot time.
 166  166   */
 167  167  static  size_t  segkmem_kmemlp_min;
 168  168  
 169  169  /*
 170  170   * Throttling is used to avoid expensive tries to allocate large pages
 171  171   * for kernel heap when a lot of succesive attempts to do so fail.
 172  172   */
 173  173  static  ulong_t segkmem_lpthrottle_max = 0x400000;
 174  174  static  ulong_t segkmem_lpthrottle_start = 0x40;
 175  175  static  ulong_t segkmem_use_lpthrottle = 1;
 176  176  
 177  177  /*
 178  178   * Freed pages accumulate on a garbage list until segkmem is ready,
 179  179   * at which point we call segkmem_gc() to free it all.
 180  180   */
 181  181  typedef struct segkmem_gc_list {
 182  182          struct segkmem_gc_list  *gc_next;
 183  183          vmem_t                  *gc_arena;
 184  184          size_t                  gc_size;
 185  185  } segkmem_gc_list_t;
 186  186  
 187  187  static segkmem_gc_list_t *segkmem_gc_list;
 188  188  
 189  189  /*
 190  190   * Allocations from the hat_memload arena add VM_MEMLOAD to their
 191  191   * vmflags so that segkmem_xalloc() can inform the hat layer that it needs
 192  192   * to take steps to prevent infinite recursion.  HAT allocations also
 193  193   * must be non-relocatable to prevent recursive page faults.
 194  194   */
 195  195  static void *
 196  196  hat_memload_alloc(vmem_t *vmp, size_t size, int flags)
 197  197  {
 198  198          flags |= (VM_MEMLOAD | VM_NORELOC);
 199  199          return (segkmem_alloc(vmp, size, flags));
 200  200  }
 201  201  
 202  202  /*
 203  203   * Allocations from static_arena arena (or any other arena that uses
 204  204   * segkmem_alloc_permanent()) require non-relocatable (permanently
 205  205   * wired) memory pages, since these pages are referenced by physical
 206  206   * as well as virtual address.
 207  207   */
 208  208  void *
 209  209  segkmem_alloc_permanent(vmem_t *vmp, size_t size, int flags)
 210  210  {
 211  211          return (segkmem_alloc(vmp, size, flags | VM_NORELOC));
 212  212  }
 213  213  
 214  214  /*
 215  215   * Initialize kernel heap boundaries.
 216  216   */
 217  217  void
 218  218  kernelheap_init(
 219  219          void *heap_start,
 220  220          void *heap_end,
 221  221          char *first_avail,
 222  222          void *core_start,
 223  223          void *core_end)
 224  224  {
 225  225          uintptr_t textbase;
 226  226          size_t core_size;
 227  227          size_t heap_size;
 228  228          vmem_t *heaptext_parent;
 229  229          size_t  heap_lp_size = 0;
 230  230  #ifdef __sparc
 231  231          size_t kmem64_sz = kmem64_aligned_end - kmem64_base;
 232  232  #endif  /* __sparc */
 233  233  
 234  234          kernelheap = heap_start;
 235  235          ekernelheap = heap_end;
 236  236  
 237  237  #ifdef __sparc
 238  238          heap_lp_size = (((uintptr_t)heap_end - (uintptr_t)heap_start) / 4);
 239  239          /*
 240  240           * Bias heap_lp start address by kmem64_sz to reduce collisions
 241  241           * in 4M kernel TSB between kmem64 area and heap_lp
 242  242           */
 243  243          kmem64_sz = P2ROUNDUP(kmem64_sz, MMU_PAGESIZE256M);
 244  244          if (kmem64_sz <= heap_lp_size / 2)
 245  245                  heap_lp_size -= kmem64_sz;
 246  246          heap_lp_base = ekernelheap - heap_lp_size;
 247  247          heap_lp_end = heap_lp_base + heap_lp_size;
 248  248  #endif  /* __sparc */
 249  249  
 250  250          /*
 251  251           * If this platform has a 'core' heap area, then the space for
 252  252           * overflow module text should be carved out of the end of that
 253  253           * heap.  Otherwise, it gets carved out of the general purpose
 254  254           * heap.
 255  255           */
 256  256          core_size = (uintptr_t)core_end - (uintptr_t)core_start;
 257  257          if (core_size > 0) {
 258  258                  ASSERT(core_size >= HEAPTEXT_SIZE);
 259  259                  textbase = (uintptr_t)core_end - HEAPTEXT_SIZE;
 260  260                  core_size -= HEAPTEXT_SIZE;
 261  261          }
 262  262  #ifndef __sparc
 263  263          else {
 264  264                  ekernelheap -= HEAPTEXT_SIZE;
 265  265                  textbase = (uintptr_t)ekernelheap;
 266  266          }
 267  267  #endif
 268  268  
 269  269          heap_size = (uintptr_t)ekernelheap - (uintptr_t)kernelheap;
 270  270          heap_arena = vmem_init("heap", kernelheap, heap_size, PAGESIZE,
 271  271              segkmem_alloc, segkmem_free);
 272  272  
 273  273          if (core_size > 0) {
 274  274                  heap_core_arena = vmem_create("heap_core", core_start,
 275  275                      core_size, PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
 276  276                  heap_core_base = core_start;
 277  277          } else {
 278  278                  heap_core_arena = heap_arena;
 279  279                  heap_core_base = kernelheap;
 280  280          }
 281  281  
 282  282          /*
 283  283           * reserve space for the large page heap. If large pages for kernel
 284  284           * heap is enabled large page heap arean will be created later in the
 285  285           * boot sequence in segkmem_heap_lp_init(). Otherwise the allocated
 286  286           * range will be returned back to the heap_arena.
 287  287           */
 288  288          if (heap_lp_size) {
 289  289                  (void) vmem_xalloc(heap_arena, heap_lp_size, PAGESIZE, 0, 0,
 290  290                      heap_lp_base, heap_lp_end,
 291  291                      VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 292  292          }
 293  293  
 294  294          /*
 295  295           * Remove the already-spoken-for memory range [kernelheap, first_avail).
 296  296           */
 297  297          (void) vmem_xalloc(heap_arena, first_avail - kernelheap, PAGESIZE,
 298  298              0, 0, kernelheap, first_avail, VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 299  299  
 300  300  #ifdef __sparc
 301  301          heap32_arena = vmem_create("heap32", (void *)SYSBASE32,
 302  302              SYSLIMIT32 - SYSBASE32 - HEAPTEXT_SIZE, PAGESIZE, NULL,
 303  303              NULL, NULL, 0, VM_SLEEP);
 304  304          /*
 305  305           * Prom claims the physical and virtual resources used by panicbuf
 306  306           * and inter_vec_table. So reserve space for panicbuf, intr_vec_table,
 307  307           * reserved interrupt vector data structures from 32-bit heap.
 308  308           */
 309  309          (void) vmem_xalloc(heap32_arena, PANICBUFSIZE, PAGESIZE, 0, 0,
 310  310              panicbuf, panicbuf + PANICBUFSIZE,
 311  311              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 312  312  
 313  313          (void) vmem_xalloc(heap32_arena, IVSIZE, PAGESIZE, 0, 0,
 314  314              intr_vec_table, (caddr_t)intr_vec_table + IVSIZE,
 315  315              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 316  316  
 317  317          textbase = SYSLIMIT32 - HEAPTEXT_SIZE;
 318  318          heaptext_parent = NULL;
 319  319  #else   /* __sparc */
 320  320          heap32_arena = heap_core_arena;
 321  321          heaptext_parent = heap_core_arena;
 322  322  #endif  /* __sparc */
 323  323  
 324  324          heaptext_arena = vmem_create("heaptext", (void *)textbase,
 325  325              HEAPTEXT_SIZE, PAGESIZE, NULL, NULL, heaptext_parent, 0, VM_SLEEP);
 326  326  
 327  327          /*
 328  328           * Create a set of arenas for memory with static translations
 329  329           * (e.g. VA -> PA translations cannot change).  Since using
 330  330           * kernel pages by physical address implies it isn't safe to
 331  331           * walk across page boundaries, the static_arena quantum must
 332  332           * be PAGESIZE.  Any kmem caches that require static memory
 333  333           * should source from static_arena, while direct allocations
 334  334           * should only use static_alloc_arena.
 335  335           */
 336  336          static_arena = vmem_create("static", NULL, 0, PAGESIZE,
 337  337              segkmem_alloc_permanent, segkmem_free, heap_arena, 0, VM_SLEEP);
 338  338          static_alloc_arena = vmem_create("static_alloc", NULL, 0,
 339  339              sizeof (uint64_t), vmem_alloc, vmem_free, static_arena,
 340  340              0, VM_SLEEP);
 341  341  
 342  342          /*
 343  343           * Create an arena for translation data (ptes, hmes, or hblks).
 344  344           * We need an arena for this because hat_memload() is essential
 345  345           * to vmem_populate() (see comments in common/os/vmem.c).
 346  346           *
 347  347           * Note: any kmem cache that allocates from hat_memload_arena
 348  348           * must be created as a KMC_NOHASH cache (i.e. no external slab
 349  349           * and bufctl structures to allocate) so that slab creation doesn't
 350  350           * require anything more than a single vmem_alloc().
 351  351           */
 352  352          hat_memload_arena = vmem_create("hat_memload", NULL, 0, PAGESIZE,
 353  353              hat_memload_alloc, segkmem_free, heap_arena, 0,
 354  354              VM_SLEEP | VMC_POPULATOR | VMC_DUMPSAFE);
 355  355  }
 356  356  
 357  357  void
 358  358  boot_mapin(caddr_t addr, size_t size)
 359  359  {
 360  360          caddr_t  eaddr;
 361  361          page_t  *pp;
 362  362          pfn_t    pfnum;
 363  363  
 364  364          if (page_resv(btop(size), KM_NOSLEEP) == 0)
 365  365                  panic("boot_mapin: page_resv failed");
 366  366  
 367  367          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
 368  368                  pfnum = va_to_pfn(addr);
 369  369                  if (pfnum == PFN_INVALID)
 370  370                          continue;
 371  371                  if ((pp = page_numtopp_nolock(pfnum)) == NULL)
 372  372                          panic("boot_mapin(): No pp for pfnum = %lx", pfnum);
 373  373  
 374  374                  /*
 375  375                   * must break up any large pages that may have constituent
 376  376                   * pages being utilized for BOP_ALLOC()'s before calling
 377  377                   * page_numtopp().The locking code (ie. page_reclaim())
 378  378                   * can't handle them
 379  379                   */
 380  380                  if (pp->p_szc != 0)
 381  381                          page_boot_demote(pp);
 382  382  
 383  383                  pp = page_numtopp(pfnum, SE_EXCL);
 384  384                  if (pp == NULL || PP_ISFREE(pp))
 385  385                          panic("boot_alloc: pp is NULL or free");
 386  386  
 387  387                  /*
 388  388                   * If the cage is on but doesn't yet contain this page,
 389  389                   * mark it as non-relocatable.
 390  390                   */
 391  391                  if (kcage_on && !PP_ISNORELOC(pp)) {
 392  392                          PP_SETNORELOC(pp);
 393  393                          PLCNT_XFER_NORELOC(pp);
 394  394                  }
 395  395  
 396  396                  (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
 397  397                  pp->p_lckcnt = 1;
 398  398  #if defined(__x86)
 399  399                  page_downgrade(pp);
 400  400  #else
 401  401                  page_unlock(pp);
 402  402  #endif
 403  403          }
 404  404  }
 405  405  
 406  406  /*
 407  407   * Get pages from boot and hash them into the kernel's vp.
 408  408   * Used after page structs have been allocated, but before segkmem is ready.
 409  409   */
 410  410  void *
 411  411  boot_alloc(void *inaddr, size_t size, uint_t align)
 412  412  {
 413  413          caddr_t addr = inaddr;
 414  414  
 415  415          if (bootops == NULL)
 416  416                  prom_panic("boot_alloc: attempt to allocate memory after "
 417  417                      "BOP_GONE");
 418  418  
 419  419          size = ptob(btopr(size));
 420  420  #ifdef __sparc
 421  421          if (bop_alloc_chunk(addr, size, align) != (caddr_t)addr)
 422  422                  panic("boot_alloc: bop_alloc_chunk failed");
 423  423  #else
 424  424          if (BOP_ALLOC(bootops, addr, size, align) != addr)
 425  425                  panic("boot_alloc: BOP_ALLOC failed");
 426  426  #endif
 427  427          boot_mapin((caddr_t)addr, size);
 428  428          return (addr);
 429  429  }
 430  430  
 431  431  static void
 432  432  segkmem_badop()
 433  433  {
 434  434          panic("segkmem_badop");
 435  435  }
 436  436  
 437  437  #define SEGKMEM_BADOP(t)        (t(*)())segkmem_badop
 438  438  
 439  439  /*ARGSUSED*/
 440  440  static faultcode_t
 441  441  segkmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t size,
 442  442          enum fault_type type, enum seg_rw rw)
 443  443  {
 444  444          pgcnt_t npages;
 445  445          spgcnt_t pg;
 446  446          page_t *pp;
 447  447          struct vnode *vp = seg->s_data;
 448  448  
 449  449          ASSERT(RW_READ_HELD(&seg->s_as->a_lock));
 450  450  
 451  451          if (seg->s_as != &kas || size > seg->s_size ||
 452  452              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 453  453                  panic("segkmem_fault: bad args");
 454  454  
 455  455          /*
 456  456           * If it is one of segkp pages, call segkp_fault.
 457  457           */
 458  458          if (segkp_bitmap && seg == &kvseg &&
 459  459              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 460  460                  return (segop_fault(hat, segkp, addr, size, type, rw));
 461  461  
 462  462          if (rw != S_READ && rw != S_WRITE && rw != S_OTHER)
 463  463                  return (FC_NOSUPPORT);
 464  464  
 465  465          npages = btopr(size);
 466  466  
 467  467          switch (type) {
 468  468          case F_SOFTLOCK:        /* lock down already-loaded translations */
 469  469                  for (pg = 0; pg < npages; pg++) {
 470  470                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
 471  471                              SE_SHARED);
 472  472                          if (pp == NULL) {
 473  473                                  /*
 474  474                                   * Hmm, no page. Does a kernel mapping
 475  475                                   * exist for it?
 476  476                                   */
 477  477                                  if (!hat_probe(kas.a_hat, addr)) {
 478  478                                          addr -= PAGESIZE;
 479  479                                          while (--pg >= 0) {
 480  480                                                  pp = page_find(vp, (u_offset_t)
 481  481                                                      (uintptr_t)addr);
 482  482                                                  if (pp)
 483  483                                                          page_unlock(pp);
 484  484                                                  addr -= PAGESIZE;
 485  485                                          }
 486  486                                          return (FC_NOMAP);
 487  487                                  }
 488  488                          }
 489  489                          addr += PAGESIZE;
 490  490                  }
 491  491                  if (rw == S_OTHER)
 492  492                          hat_reserve(seg->s_as, addr, size);
 493  493                  return (0);
 494  494          case F_SOFTUNLOCK:
 495  495                  while (npages--) {
 496  496                          pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
 497  497                          if (pp)
 498  498                                  page_unlock(pp);
 499  499                          addr += PAGESIZE;
 500  500                  }
 501  501                  return (0);
 502  502          default:
 503  503                  return (FC_NOSUPPORT);
 504  504          }
 505  505          /*NOTREACHED*/
 506  506  }
 507  507  
 508  508  static int
 509  509  segkmem_setprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 510  510  {
 511  511          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 512  512  
 513  513          if (seg->s_as != &kas || size > seg->s_size ||
 514  514              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 515  515                  panic("segkmem_setprot: bad args");
 516  516  
 517  517          /*
 518  518           * If it is one of segkp pages, call segkp.
 519  519           */
 520  520          if (segkp_bitmap && seg == &kvseg &&
 521  521              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 522  522                  return (segop_setprot(segkp, addr, size, prot));
 523  523  
 524  524          if (prot == 0)
 525  525                  hat_unload(kas.a_hat, addr, size, HAT_UNLOAD);
 526  526          else
 527  527                  hat_chgprot(kas.a_hat, addr, size, prot);
 528  528          return (0);
 529  529  }
 530  530  
 531  531  /*
 532  532   * This is a dummy segkmem function overloaded to call segkp
 533  533   * when segkp is under the heap.
 534  534   */
 535  535  /* ARGSUSED */
 536  536  static int
 537  537  segkmem_checkprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 538  538  {
 539  539          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 540  540  
 541  541          if (seg->s_as != &kas)
 542  542                  segkmem_badop();
 543  543  
 544  544          /*
 545  545           * If it is one of segkp pages, call into segkp.
 546  546           */
 547  547          if (segkp_bitmap && seg == &kvseg &&
 548  548              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 549  549                  return (segop_checkprot(segkp, addr, size, prot));
 550  550  
 551  551          segkmem_badop();
 552  552          return (0);
 553  553  }
 554  554  
 555  555  /*
 556  556   * This is a dummy segkmem function overloaded to call segkp
 557  557   * when segkp is under the heap.
 558  558   */
 559  559  /* ARGSUSED */
 560  560  static int
 561  561  segkmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 562  562  {
 563  563          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 564  564  
 565  565          if (seg->s_as != &kas)
 566  566                  segkmem_badop();
 567  567  
 568  568          /*
 569  569           * If it is one of segkp pages, call into segkp.
 570  570           */
 571  571          if (segkp_bitmap && seg == &kvseg &&
 572  572              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 573  573                  return (segop_kluster(segkp, addr, delta));
 574  574  
 575  575          segkmem_badop();
 576  576          return (0);
 577  577  }
 578  578  
 579  579  static void
 580  580  segkmem_xdump_range(void *arg, void *start, size_t size)
 581  581  {
 582  582          struct as *as = arg;
 583  583          caddr_t addr = start;
 584  584          caddr_t addr_end = addr + size;
 585  585  
 586  586          while (addr < addr_end) {
 587  587                  pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
 588  588                  if (pfn != PFN_INVALID && pfn <= physmax && pf_is_memory(pfn))
 589  589                          dump_addpage(as, addr, pfn);
 590  590                  addr += PAGESIZE;
 591  591                  dump_timeleft = dump_timeout;
 592  592          }
 593  593  }
 594  594  
 595  595  static void
 596  596  segkmem_dump_range(void *arg, void *start, size_t size)
 597  597  {
 598  598          caddr_t addr = start;
 599  599          caddr_t addr_end = addr + size;
 600  600  
 601  601          /*
 602  602           * If we are about to start dumping the range of addresses we
 603  603           * carved out of the kernel heap for the large page heap walk
 604  604           * heap_lp_arena to find what segments are actually populated
 605  605           */
 606  606          if (SEGKMEM_USE_LARGEPAGES &&
 607  607              addr == heap_lp_base && addr_end == heap_lp_end &&
 608  608              vmem_size(heap_lp_arena, VMEM_ALLOC) < size) {
 609  609                  vmem_walk(heap_lp_arena, VMEM_ALLOC | VMEM_REENTRANT,
 610  610                      segkmem_xdump_range, arg);
 611  611          } else {
 612  612                  segkmem_xdump_range(arg, start, size);
 613  613          }
 614  614  }
 615  615  
 616  616  static void
 617  617  segkmem_dump(struct seg *seg)
 618  618  {
 619  619          /*
 620  620           * The kernel's heap_arena (represented by kvseg) is a very large
 621  621           * VA space, most of which is typically unused.  To speed up dumping
 622  622           * we use vmem_walk() to quickly find the pieces of heap_arena that
 623  623           * are actually in use.  We do the same for heap32_arena and
 624  624           * heap_core.
 625  625           *
 626  626           * We specify VMEM_REENTRANT to vmem_walk() because dump_addpage()
 627  627           * may ultimately need to allocate memory.  Reentrant walks are
 628  628           * necessarily imperfect snapshots.  The kernel heap continues
 629  629           * to change during a live crash dump, for example.  For a normal
 630  630           * crash dump, however, we know that there won't be any other threads
 631  631           * messing with the heap.  Therefore, at worst, we may fail to dump
 632  632           * the pages that get allocated by the act of dumping; but we will
 633  633           * always dump every page that was allocated when the walk began.
 634  634           *
 635  635           * The other segkmem segments are dense (fully populated), so there's
 636  636           * no need to use this technique when dumping them.
 637  637           *
 638  638           * Note: when adding special dump handling for any new sparsely-
 639  639           * populated segments, be sure to add similar handling to the ::kgrep
 640  640           * code in mdb.
 641  641           */
 642  642          if (seg == &kvseg) {
 643  643                  vmem_walk(heap_arena, VMEM_ALLOC | VMEM_REENTRANT,
 644  644                      segkmem_dump_range, seg->s_as);
 645  645  #ifndef __sparc
 646  646                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 647  647                      segkmem_dump_range, seg->s_as);
 648  648  #endif
 649  649          } else if (seg == &kvseg_core) {
 650  650                  vmem_walk(heap_core_arena, VMEM_ALLOC | VMEM_REENTRANT,
 651  651                      segkmem_dump_range, seg->s_as);
 652  652          } else if (seg == &kvseg32) {
 653  653                  vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
 654  654                      segkmem_dump_range, seg->s_as);
 655  655                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 656  656                      segkmem_dump_range, seg->s_as);
 657  657          } else if (seg == &kzioseg) {
 658  658                  /*
 659  659                   * We don't want to dump pages attached to kzioseg since they
 660  660                   * contain file data from ZFS.  If this page's segment is
 661  661                   * kzioseg return instead of writing it to the dump device.
 662  662                   */
 663  663                  return;
 664  664          } else {
 665  665                  segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
 666  666          }
 667  667  }
 668  668  
 669  669  /*
 670  670   * lock/unlock kmem pages over a given range [addr, addr+len).
 671  671   * Returns a shadow list of pages in ppp. If there are holes
 672  672   * in the range (e.g. some of the kernel mappings do not have
 673  673   * underlying page_ts) returns ENOTSUP so that as_pagelock()
 674  674   * will handle the range via as_fault(F_SOFTLOCK).
 675  675   */
 676  676  /*ARGSUSED*/
 677  677  static int
 678  678  segkmem_pagelock(struct seg *seg, caddr_t addr, size_t len,
 679  679          page_t ***ppp, enum lock_type type, enum seg_rw rw)
 680  680  {
 681  681          page_t **pplist, *pp;
 682  682          pgcnt_t npages;
 683  683          spgcnt_t pg;
 684  684          size_t nb;
 685  685          struct vnode *vp = seg->s_data;
 686  686  
 687  687          ASSERT(ppp != NULL);
 688  688  
 689  689          /*
 690  690           * If it is one of segkp pages, call into segkp.
 691  691           */
 692  692          if (segkp_bitmap && seg == &kvseg &&
 693  693              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 694  694                  return (segop_pagelock(segkp, addr, len, ppp, type, rw));
 695  695  
 696  696          npages = btopr(len);
 697  697          nb = sizeof (page_t *) * npages;
 698  698  
 699  699          if (type == L_PAGEUNLOCK) {
 700  700                  pplist = *ppp;
 701  701                  ASSERT(pplist != NULL);
 702  702  
 703  703                  for (pg = 0; pg < npages; pg++) {
 704  704                          pp = pplist[pg];
 705  705                          page_unlock(pp);
 706  706                  }
 707  707                  kmem_free(pplist, nb);
 708  708                  return (0);
 709  709          }
 710  710  
 711  711          ASSERT(type == L_PAGELOCK);
 712  712  
 713  713          pplist = kmem_alloc(nb, KM_NOSLEEP);
 714  714          if (pplist == NULL) {
 715  715                  *ppp = NULL;
 716  716                  return (ENOTSUP);       /* take the slow path */
 717  717          }
 718  718  
 719  719          for (pg = 0; pg < npages; pg++) {
 720  720                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
 721  721                  if (pp == NULL) {
 722  722                          while (--pg >= 0)
 723  723                                  page_unlock(pplist[pg]);
 724  724                          kmem_free(pplist, nb);
 725  725                          *ppp = NULL;
 726  726                          return (ENOTSUP);
 727  727                  }
 728  728                  pplist[pg] = pp;
 729  729                  addr += PAGESIZE;
 730  730          }
 731  731  
 732  732          *ppp = pplist;
 733  733          return (0);
 734  734  }
 735  735  
 736  736  /*
 737  737   * This is a dummy segkmem function overloaded to call segkp
 738  738   * when segkp is under the heap.
 739  739   */
 740  740  /* ARGSUSED */
 741  741  static int
 742  742  segkmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 743  743  {
 744  744          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 745  745  
 746  746          if (seg->s_as != &kas)
 747  747                  segkmem_badop();
 748  748  
 749  749          /*
 750  750           * If it is one of segkp pages, call into segkp.
 751  751           */
 752  752          if (segkp_bitmap && seg == &kvseg &&
 753  753              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 754  754                  return (segop_getmemid(segkp, addr, memidp));
 755  755  
 756  756          segkmem_badop();
 757  757          return (0);
 758  758  }

↓ open down ↓

758 lines elided

↑ open up ↑

 759  759  
 760  760  /*ARGSUSED*/
 761  761  static int
 762  762  segkmem_capable(struct seg *seg, segcapability_t capability)
 763  763  {
 764  764          if (capability == S_CAPABILITY_NOMINFLT)
 765  765                  return (1);
 766  766          return (0);
 767  767  }
 768  768  
 769      -static struct seg_ops segkmem_ops = {
      769 +static const struct seg_ops segkmem_ops = {
 770  770          .dup            = SEGKMEM_BADOP(int),
 771  771          .unmap          = SEGKMEM_BADOP(int),
 772  772          .free           = SEGKMEM_BADOP(void),
 773  773          .fault          = segkmem_fault,
 774  774          .faulta         = SEGKMEM_BADOP(faultcode_t),
 775  775          .setprot        = segkmem_setprot,
 776  776          .checkprot      = segkmem_checkprot,
 777  777          .kluster        = segkmem_kluster,
 778  778          .swapout        = SEGKMEM_BADOP(size_t),
 779  779          .sync           = SEGKMEM_BADOP(int),

 780  780          .incore         = SEGKMEM_BADOP(size_t),
 781  781          .lockop         = SEGKMEM_BADOP(int),
 782  782          .getprot        = SEGKMEM_BADOP(int),
 783  783          .getoffset      = SEGKMEM_BADOP(u_offset_t),
 784  784          .gettype        = SEGKMEM_BADOP(int),
 785  785          .getvp          = SEGKMEM_BADOP(int),
 786  786          .advise         = SEGKMEM_BADOP(int),
 787  787          .dump           = segkmem_dump,
 788  788          .pagelock       = segkmem_pagelock,
 789  789          .setpagesize    = SEGKMEM_BADOP(int),
 790  790          .getmemid       = segkmem_getmemid,
 791  791          .capable        = segkmem_capable,
 792  792  };
 793  793  
 794  794  int
 795  795  segkmem_zio_create(struct seg *seg)
 796  796  {
 797  797          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 798  798          seg->s_ops = &segkmem_ops;
 799  799          seg->s_data = &zvp;
 800  800          kas.a_size += seg->s_size;
 801  801          return (0);
 802  802  }
 803  803  
 804  804  int
 805  805  segkmem_create(struct seg *seg)
 806  806  {
 807  807          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 808  808          seg->s_ops = &segkmem_ops;
 809  809          seg->s_data = &kvp;
 810  810          kas.a_size += seg->s_size;
 811  811          return (0);
 812  812  }
 813  813  
 814  814  /*ARGSUSED*/
 815  815  page_t *
 816  816  segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
 817  817  {
 818  818          struct seg kseg;
 819  819          int pgflags;
 820  820          struct vnode *vp = arg;
 821  821  
 822  822          if (vp == NULL)
 823  823                  vp = &kvp;
 824  824  
 825  825          kseg.s_as = &kas;
 826  826          pgflags = PG_EXCL;
 827  827  
 828  828          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
 829  829                  pgflags |= PG_NORELOC;
 830  830          if ((vmflag & VM_NOSLEEP) == 0)
 831  831                  pgflags |= PG_WAIT;
 832  832          if (vmflag & VM_PANIC)
 833  833                  pgflags |= PG_PANIC;
 834  834          if (vmflag & VM_PUSHPAGE)
 835  835                  pgflags |= PG_PUSHPAGE;
 836  836          if (vmflag & VM_NORMALPRI) {
 837  837                  ASSERT(vmflag & VM_NOSLEEP);
 838  838                  pgflags |= PG_NORMALPRI;
 839  839          }
 840  840  
 841  841          return (page_create_va(vp, (u_offset_t)(uintptr_t)addr, size,
 842  842              pgflags, &kseg, addr));
 843  843  }
 844  844  
 845  845  /*
 846  846   * Allocate pages to back the virtual address range [addr, addr + size).
 847  847   * If addr is NULL, allocate the virtual address space as well.
 848  848   */
 849  849  void *
 850  850  segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
 851  851          page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
 852  852  {
 853  853          page_t *ppl;
 854  854          caddr_t addr = inaddr;
 855  855          pgcnt_t npages = btopr(size);
 856  856          int allocflag;
 857  857  
 858  858          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
 859  859                  return (NULL);
 860  860  
 861  861          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 862  862  
 863  863          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
 864  864                  if (inaddr == NULL)
 865  865                          vmem_free(vmp, addr, size);
 866  866                  return (NULL);
 867  867          }
 868  868  
 869  869          ppl = page_create_func(addr, size, vmflag, pcarg);
 870  870          if (ppl == NULL) {
 871  871                  if (inaddr == NULL)
 872  872                          vmem_free(vmp, addr, size);
 873  873                  page_unresv(npages);
 874  874                  return (NULL);
 875  875          }
 876  876  
 877  877          /*
 878  878           * Under certain conditions, we need to let the HAT layer know
 879  879           * that it cannot safely allocate memory.  Allocations from
 880  880           * the hat_memload vmem arena always need this, to prevent
 881  881           * infinite recursion.
 882  882           *
 883  883           * In addition, the x86 hat cannot safely do memory
 884  884           * allocations while in vmem_populate(), because there
 885  885           * is no simple bound on its usage.
 886  886           */
 887  887          if (vmflag & VM_MEMLOAD)
 888  888                  allocflag = HAT_NO_KALLOC;
 889  889  #if defined(__x86)
 890  890          else if (vmem_is_populator())
 891  891                  allocflag = HAT_NO_KALLOC;
 892  892  #endif
 893  893          else
 894  894                  allocflag = 0;
 895  895  
 896  896          while (ppl != NULL) {
 897  897                  page_t *pp = ppl;
 898  898                  page_sub(&ppl, pp);
 899  899                  ASSERT(page_iolock_assert(pp));
 900  900                  ASSERT(PAGE_EXCL(pp));
 901  901                  page_io_unlock(pp);
 902  902                  hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
 903  903                      (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
 904  904                      HAT_LOAD_LOCK | allocflag);
 905  905                  pp->p_lckcnt = 1;
 906  906  #if defined(__x86)
 907  907                  page_downgrade(pp);
 908  908  #else
 909  909                  if (vmflag & SEGKMEM_SHARELOCKED)
 910  910                          page_downgrade(pp);
 911  911                  else
 912  912                          page_unlock(pp);
 913  913  #endif
 914  914          }
 915  915  
 916  916          return (addr);
 917  917  }
 918  918  
 919  919  static void *
 920  920  segkmem_alloc_vn(vmem_t *vmp, size_t size, int vmflag, struct vnode *vp)
 921  921  {
 922  922          void *addr;
 923  923          segkmem_gc_list_t *gcp, **prev_gcpp;
 924  924  
 925  925          ASSERT(vp != NULL);
 926  926  
 927  927          if (kvseg.s_base == NULL) {
 928  928  #ifndef __sparc
 929  929                  if (bootops->bsys_alloc == NULL)
 930  930                          halt("Memory allocation between bop_alloc() and "
 931  931                              "kmem_alloc().\n");
 932  932  #endif
 933  933  
 934  934                  /*
 935  935                   * There's not a lot of memory to go around during boot,
 936  936                   * so recycle it if we can.
 937  937                   */
 938  938                  for (prev_gcpp = &segkmem_gc_list; (gcp = *prev_gcpp) != NULL;
 939  939                      prev_gcpp = &gcp->gc_next) {
 940  940                          if (gcp->gc_arena == vmp && gcp->gc_size == size) {
 941  941                                  *prev_gcpp = gcp->gc_next;
 942  942                                  return (gcp);
 943  943                          }
 944  944                  }
 945  945  
 946  946                  addr = vmem_alloc(vmp, size, vmflag | VM_PANIC);
 947  947                  if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
 948  948                          panic("segkmem_alloc: boot_alloc failed");
 949  949                  return (addr);
 950  950          }
 951  951          return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
 952  952              segkmem_page_create, vp));
 953  953  }
 954  954  
 955  955  void *
 956  956  segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
 957  957  {
 958  958          return (segkmem_alloc_vn(vmp, size, vmflag, &kvp));
 959  959  }
 960  960  
 961  961  void *
 962  962  segkmem_zio_alloc(vmem_t *vmp, size_t size, int vmflag)
 963  963  {
 964  964          return (segkmem_alloc_vn(vmp, size, vmflag, &zvp));
 965  965  }
 966  966  
 967  967  /*
 968  968   * Any changes to this routine must also be carried over to
 969  969   * devmap_free_pages() in the seg_dev driver. This is because
 970  970   * we currently don't have a special kernel segment for non-paged
 971  971   * kernel memory that is exported by drivers to user space.
 972  972   */
 973  973  static void
 974  974  segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp,
 975  975      void (*func)(page_t *))
 976  976  {
 977  977          page_t *pp;
 978  978          caddr_t addr = inaddr;
 979  979          caddr_t eaddr;
 980  980          pgcnt_t npages = btopr(size);
 981  981  
 982  982          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 983  983          ASSERT(vp != NULL);
 984  984  
 985  985          if (kvseg.s_base == NULL) {
 986  986                  segkmem_gc_list_t *gc = inaddr;
 987  987                  gc->gc_arena = vmp;
 988  988                  gc->gc_size = size;
 989  989                  gc->gc_next = segkmem_gc_list;
 990  990                  segkmem_gc_list = gc;
 991  991                  return;
 992  992          }
 993  993  
 994  994          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
 995  995  
 996  996          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
 997  997  #if defined(__x86)
 998  998                  pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
 999  999                  if (pp == NULL)
1000 1000                          panic("segkmem_free: page not found");
1001 1001                  if (!page_tryupgrade(pp)) {
1002 1002                          /*
1003 1003                           * Some other thread has a sharelock. Wait for
1004 1004                           * it to drop the lock so we can free this page.
1005 1005                           */
1006 1006                          page_unlock(pp);
1007 1007                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
1008 1008                              SE_EXCL);
1009 1009                  }
1010 1010  #else
1011 1011                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1012 1012  #endif
1013 1013                  if (pp == NULL)
1014 1014                          panic("segkmem_free: page not found");
1015 1015                  /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
1016 1016                  pp->p_lckcnt = 0;
1017 1017                  if (func)
1018 1018                          func(pp);
1019 1019                  else
1020 1020                          page_destroy(pp, 0);
1021 1021          }
1022 1022          if (func == NULL)
1023 1023                  page_unresv(npages);
1024 1024  
1025 1025          if (vmp != NULL)
1026 1026                  vmem_free(vmp, inaddr, size);
1027 1027  
1028 1028  }
1029 1029  
1030 1030  void
1031 1031  segkmem_xfree(vmem_t *vmp, void *inaddr, size_t size, void (*func)(page_t *))
1032 1032  {
1033 1033          segkmem_free_vn(vmp, inaddr, size, &kvp, func);
1034 1034  }
1035 1035  
1036 1036  void
1037 1037  segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1038 1038  {
1039 1039          segkmem_free_vn(vmp, inaddr, size, &kvp, NULL);
1040 1040  }
1041 1041  
1042 1042  void
1043 1043  segkmem_zio_free(vmem_t *vmp, void *inaddr, size_t size)
1044 1044  {
1045 1045          segkmem_free_vn(vmp, inaddr, size, &zvp, NULL);
1046 1046  }
1047 1047  
1048 1048  void
1049 1049  segkmem_gc(void)
1050 1050  {
1051 1051          ASSERT(kvseg.s_base != NULL);
1052 1052          while (segkmem_gc_list != NULL) {
1053 1053                  segkmem_gc_list_t *gc = segkmem_gc_list;
1054 1054                  segkmem_gc_list = gc->gc_next;
1055 1055                  segkmem_free(gc->gc_arena, gc, gc->gc_size);
1056 1056          }
1057 1057  }
1058 1058  
1059 1059  /*
1060 1060   * Legacy entry points from here to end of file.
1061 1061   */
1062 1062  void
1063 1063  segkmem_mapin(struct seg *seg, void *addr, size_t size, uint_t vprot,
1064 1064      pfn_t pfn, uint_t flags)
1065 1065  {
1066 1066          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1067 1067          hat_devload(seg->s_as->a_hat, addr, size, pfn, vprot,
1068 1068              flags | HAT_LOAD_LOCK);
1069 1069  }
1070 1070  
1071 1071  void
1072 1072  segkmem_mapout(struct seg *seg, void *addr, size_t size)
1073 1073  {
1074 1074          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1075 1075  }
1076 1076  
1077 1077  void *
1078 1078  kmem_getpages(pgcnt_t npages, int kmflag)
1079 1079  {
1080 1080          return (kmem_alloc(ptob(npages), kmflag));
1081 1081  }
1082 1082  
1083 1083  void
1084 1084  kmem_freepages(void *addr, pgcnt_t npages)
1085 1085  {
1086 1086          kmem_free(addr, ptob(npages));
1087 1087  }
1088 1088  
1089 1089  /*
1090 1090   * segkmem_page_create_large() allocates a large page to be used for the kmem
1091 1091   * caches. If kpr is enabled we ask for a relocatable page unless requested
1092 1092   * otherwise. If kpr is disabled we have to ask for a non-reloc page
1093 1093   */
1094 1094  static page_t *
1095 1095  segkmem_page_create_large(void *addr, size_t size, int vmflag, void *arg)
1096 1096  {
1097 1097          int pgflags;
1098 1098  
1099 1099          pgflags = PG_EXCL;
1100 1100  
1101 1101          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
1102 1102                  pgflags |= PG_NORELOC;
1103 1103          if (!(vmflag & VM_NOSLEEP))
1104 1104                  pgflags |= PG_WAIT;
1105 1105          if (vmflag & VM_PUSHPAGE)
1106 1106                  pgflags |= PG_PUSHPAGE;
1107 1107          if (vmflag & VM_NORMALPRI)
1108 1108                  pgflags |= PG_NORMALPRI;
1109 1109  
1110 1110          return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1111 1111              pgflags, &kvseg, addr, arg));
1112 1112  }
1113 1113  
1114 1114  /*
1115 1115   * Allocate a large page to back the virtual address range
1116 1116   * [addr, addr + size).  If addr is NULL, allocate the virtual address
1117 1117   * space as well.
1118 1118   */
1119 1119  static void *
1120 1120  segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1121 1121      uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1122 1122      void *pcarg)
1123 1123  {
1124 1124          caddr_t addr = inaddr, pa;
1125 1125          size_t  lpsize = segkmem_lpsize;
1126 1126          pgcnt_t npages = btopr(size);
1127 1127          pgcnt_t nbpages = btop(lpsize);
1128 1128          pgcnt_t nlpages = size >> segkmem_lpshift;
1129 1129          size_t  ppasize = nbpages * sizeof (page_t *);
1130 1130          page_t *pp, *rootpp, **ppa, *pplist = NULL;
1131 1131          int i;
1132 1132  
1133 1133          vmflag |= VM_NOSLEEP;
1134 1134  
1135 1135          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1136 1136                  return (NULL);
1137 1137          }
1138 1138  
1139 1139          /*
1140 1140           * allocate an array we need for hat_memload_array.
1141 1141           * we use a separate arena to avoid recursion.
1142 1142           * we will not need this array when hat_memload_array learns pp++
1143 1143           */
1144 1144          if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
1145 1145                  goto fail_array_alloc;
1146 1146          }
1147 1147  
1148 1148          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
1149 1149                  goto fail_vmem_alloc;
1150 1150  
1151 1151          ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);
1152 1152  
1153 1153          /* create all the pages */
1154 1154          for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
1155 1155                  if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
1156 1156                          goto fail_page_create;
1157 1157                  page_list_concat(&pplist, &pp);
1158 1158          }
1159 1159  
1160 1160          /* at this point we have all the resource to complete the request */
1161 1161          while ((rootpp = pplist) != NULL) {
1162 1162                  for (i = 0; i < nbpages; i++) {
1163 1163                          ASSERT(pplist != NULL);
1164 1164                          pp = pplist;
1165 1165                          page_sub(&pplist, pp);
1166 1166                          ASSERT(page_iolock_assert(pp));
1167 1167                          page_io_unlock(pp);
1168 1168                          ppa[i] = pp;
1169 1169                  }
1170 1170                  /*
1171 1171                   * Load the locked entry. It's OK to preload the entry into the
1172 1172                   * TSB since we now support large mappings in the kernel TSB.
1173 1173                   */
1174 1174                  hat_memload_array(kas.a_hat,
1175 1175                      (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
1176 1176                      ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
1177 1177                      HAT_LOAD_LOCK);
1178 1178  
1179 1179                  for (--i; i >= 0; --i) {
1180 1180                          ppa[i]->p_lckcnt = 1;
1181 1181                          page_unlock(ppa[i]);
1182 1182                  }
1183 1183          }
1184 1184  
1185 1185          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1186 1186          return (addr);
1187 1187  
1188 1188  fail_page_create:
1189 1189          while ((rootpp = pplist) != NULL) {
1190 1190                  for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
1191 1191                          ASSERT(pp != NULL);
1192 1192                          page_sub(&pplist, pp);
1193 1193                          ASSERT(page_iolock_assert(pp));
1194 1194                          page_io_unlock(pp);
1195 1195                  }
1196 1196                  page_destroy_pages(rootpp);
1197 1197          }
1198 1198  
1199 1199          if (inaddr == NULL)
1200 1200                  vmem_free(vmp, addr, size);
1201 1201  
1202 1202  fail_vmem_alloc:
1203 1203          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1204 1204  
1205 1205  fail_array_alloc:
1206 1206          page_unresv(npages);
1207 1207  
1208 1208          return (NULL);
1209 1209  }
1210 1210  
1211 1211  static void
1212 1212  segkmem_free_one_lp(caddr_t addr, size_t size)
1213 1213  {
1214 1214          page_t          *pp, *rootpp = NULL;
1215 1215          pgcnt_t         pgs_left = btopr(size);
1216 1216  
1217 1217          ASSERT(size == segkmem_lpsize);
1218 1218  
1219 1219          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1220 1220  
1221 1221          for (; pgs_left > 0; addr += PAGESIZE, pgs_left--) {
1222 1222                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1223 1223                  if (pp == NULL)
1224 1224                          panic("segkmem_free_one_lp: page not found");
1225 1225                  ASSERT(PAGE_EXCL(pp));
1226 1226                  pp->p_lckcnt = 0;
1227 1227                  if (rootpp == NULL)
1228 1228                          rootpp = pp;
1229 1229          }
1230 1230          ASSERT(rootpp != NULL);
1231 1231          page_destroy_pages(rootpp);
1232 1232  
1233 1233          /* page_unresv() is done by the caller */
1234 1234  }
1235 1235  
1236 1236  /*
1237 1237   * This function is called to import new spans into the vmem arenas like
1238 1238   * kmem_default_arena and kmem_oversize_arena. It first tries to import
1239 1239   * spans from large page arena - kmem_lp_arena. In order to do this it might
1240 1240   * have to "upgrade the requested size" to kmem_lp_arena quantum. If
1241 1241   * it was not able to satisfy the upgraded request it then calls regular
1242 1242   * segkmem_alloc() that satisfies the request by importing from "*vmp" arena
1243 1243   */
1244 1244  /*ARGSUSED*/
1245 1245  void *
1246 1246  segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag)
1247 1247  {
1248 1248          size_t size;
1249 1249          kthread_t *t = curthread;
1250 1250          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1251 1251  
1252 1252          ASSERT(sizep != NULL);
1253 1253  
1254 1254          size = *sizep;
1255 1255  
1256 1256          if (lpcb->lp_uselp && !(t->t_flag & T_PANIC) &&
1257 1257              !(vmflag & SEGKMEM_SHARELOCKED)) {
1258 1258  
1259 1259                  size_t kmemlp_qnt = segkmem_kmemlp_quantum;
1260 1260                  size_t asize = P2ROUNDUP(size, kmemlp_qnt);
1261 1261                  void  *addr = NULL;
1262 1262                  ulong_t *lpthrtp = &lpcb->lp_throttle;
1263 1263                  ulong_t lpthrt = *lpthrtp;
1264 1264                  int     dowakeup = 0;
1265 1265                  int     doalloc = 1;
1266 1266  
1267 1267                  ASSERT(kmem_lp_arena != NULL);
1268 1268                  ASSERT(asize >= size);
1269 1269  
1270 1270                  if (lpthrt != 0) {
1271 1271                          /* try to update the throttle value */
1272 1272                          lpthrt = atomic_inc_ulong_nv(lpthrtp);
1273 1273                          if (lpthrt >= segkmem_lpthrottle_max) {
1274 1274                                  lpthrt = atomic_cas_ulong(lpthrtp, lpthrt,
1275 1275                                      segkmem_lpthrottle_max / 4);
1276 1276                          }
1277 1277  
1278 1278                          /*
1279 1279                           * when we get above throttle start do an exponential
1280 1280                           * backoff at trying large pages and reaping
1281 1281                           */
1282 1282                          if (lpthrt > segkmem_lpthrottle_start &&
1283 1283                              !ISP2(lpthrt)) {
1284 1284                                  lpcb->allocs_throttled++;
1285 1285                                  lpthrt--;
1286 1286                                  if (ISP2(lpthrt))
1287 1287                                          kmem_reap();
1288 1288                                  return (segkmem_alloc(vmp, size, vmflag));
1289 1289                          }
1290 1290                  }
1291 1291  
1292 1292                  if (!(vmflag & VM_NOSLEEP) &&
1293 1293                      segkmem_heaplp_quantum >= (8 * kmemlp_qnt) &&
1294 1294                      vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt &&
1295 1295                      asize < (segkmem_heaplp_quantum - kmemlp_qnt)) {
1296 1296  
1297 1297                          /*
1298 1298                           * we are low on free memory in kmem_lp_arena
1299 1299                           * we let only one guy to allocate heap_lp
1300 1300                           * quantum size chunk that everybody is going to
1301 1301                           * share
1302 1302                           */
1303 1303                          mutex_enter(&lpcb->lp_lock);
1304 1304  
1305 1305                          if (lpcb->lp_wait) {
1306 1306  
1307 1307                                  /* we are not the first one - wait */
1308 1308                                  cv_wait(&lpcb->lp_cv, &lpcb->lp_lock);
1309 1309                                  if (vmem_size(kmem_lp_arena, VMEM_FREE) <
1310 1310                                      kmemlp_qnt)  {
1311 1311                                          doalloc = 0;
1312 1312                                  }
1313 1313                          } else if (vmem_size(kmem_lp_arena, VMEM_FREE) <=
1314 1314                              kmemlp_qnt) {
1315 1315  
1316 1316                                  /*
1317 1317                                   * we are the first one, make sure we import
1318 1318                                   * a large page
1319 1319                                   */
1320 1320                                  if (asize == kmemlp_qnt)
1321 1321                                          asize += kmemlp_qnt;
1322 1322                                  dowakeup = 1;
1323 1323                                  lpcb->lp_wait = 1;
1324 1324                          }
1325 1325  
1326 1326                          mutex_exit(&lpcb->lp_lock);
1327 1327                  }
1328 1328  
1329 1329                  /*
1330 1330                   * VM_ABORT flag prevents sleeps in vmem_xalloc when
1331 1331                   * large pages are not available. In that case this allocation
1332 1332                   * attempt will fail and we will retry allocation with small
1333 1333                   * pages. We also do not want to panic if this allocation fails
1334 1334                   * because we are going to retry.
1335 1335                   */
1336 1336                  if (doalloc) {
1337 1337                          addr = vmem_alloc(kmem_lp_arena, asize,
1338 1338                              (vmflag | VM_ABORT) & ~VM_PANIC);
1339 1339  
1340 1340                          if (dowakeup) {
1341 1341                                  mutex_enter(&lpcb->lp_lock);
1342 1342                                  ASSERT(lpcb->lp_wait != 0);
1343 1343                                  lpcb->lp_wait = 0;
1344 1344                                  cv_broadcast(&lpcb->lp_cv);
1345 1345                                  mutex_exit(&lpcb->lp_lock);
1346 1346                          }
1347 1347                  }
1348 1348  
1349 1349                  if (addr != NULL) {
1350 1350                          *sizep = asize;
1351 1351                          *lpthrtp = 0;
1352 1352                          return (addr);
1353 1353                  }
1354 1354  
1355 1355                  if (vmflag & VM_NOSLEEP)
1356 1356                          lpcb->nosleep_allocs_failed++;
1357 1357                  else
1358 1358                          lpcb->sleep_allocs_failed++;
1359 1359                  lpcb->alloc_bytes_failed += size;
1360 1360  
1361 1361                  /* if large page throttling is not started yet do it */
1362 1362                  if (segkmem_use_lpthrottle && lpthrt == 0) {
1363 1363                          lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, 1);
1364 1364                  }
1365 1365          }
1366 1366          return (segkmem_alloc(vmp, size, vmflag));
1367 1367  }
1368 1368  
1369 1369  void
1370 1370  segkmem_free_lp(vmem_t *vmp, void *inaddr, size_t size)
1371 1371  {
1372 1372          if (kmem_lp_arena == NULL || !IS_KMEM_VA_LARGEPAGE((caddr_t)inaddr)) {
1373 1373                  segkmem_free(vmp, inaddr, size);
1374 1374          } else {
1375 1375                  vmem_free(kmem_lp_arena, inaddr, size);
1376 1376          }
1377 1377  }
1378 1378  
1379 1379  /*
1380 1380   * segkmem_alloc_lpi() imports virtual memory from large page heap arena
1381 1381   * into kmem_lp arena. In the process it maps the imported segment with
1382 1382   * large pages
1383 1383   */
1384 1384  static void *
1385 1385  segkmem_alloc_lpi(vmem_t *vmp, size_t size, int vmflag)
1386 1386  {
1387 1387          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1388 1388          void  *addr;
1389 1389  
1390 1390          ASSERT(size != 0);
1391 1391          ASSERT(vmp == heap_lp_arena);
1392 1392  
1393 1393          /* do not allow large page heap grow beyound limits */
1394 1394          if (vmem_size(vmp, VMEM_ALLOC) >= segkmem_kmemlp_max) {
1395 1395                  lpcb->allocs_limited++;
1396 1396                  return (NULL);
1397 1397          }
1398 1398  
1399 1399          addr = segkmem_xalloc_lp(vmp, NULL, size, vmflag, 0,
1400 1400              segkmem_page_create_large, NULL);
1401 1401          return (addr);
1402 1402  }
1403 1403  
1404 1404  /*
1405 1405   * segkmem_free_lpi() returns virtual memory back into large page heap arena
1406 1406   * from kmem_lp arena. Beore doing this it unmaps the segment and frees
1407 1407   * large pages used to map it.
1408 1408   */
1409 1409  static void
1410 1410  segkmem_free_lpi(vmem_t *vmp, void *inaddr, size_t size)
1411 1411  {
1412 1412          pgcnt_t         nlpages = size >> segkmem_lpshift;
1413 1413          size_t          lpsize = segkmem_lpsize;
1414 1414          caddr_t         addr = inaddr;
1415 1415          pgcnt_t         npages = btopr(size);
1416 1416          int             i;
1417 1417  
1418 1418          ASSERT(vmp == heap_lp_arena);
1419 1419          ASSERT(IS_KMEM_VA_LARGEPAGE(addr));
1420 1420          ASSERT(((uintptr_t)inaddr & (lpsize - 1)) == 0);
1421 1421  
1422 1422          for (i = 0; i < nlpages; i++) {
1423 1423                  segkmem_free_one_lp(addr, lpsize);
1424 1424                  addr += lpsize;
1425 1425          }
1426 1426  
1427 1427          page_unresv(npages);
1428 1428  
1429 1429          vmem_free(vmp, inaddr, size);
1430 1430  }
1431 1431  
1432 1432  /*
1433 1433   * This function is called at system boot time by kmem_init right after
1434 1434   * /etc/system file has been read. It checks based on hardware configuration
1435 1435   * and /etc/system settings if system is going to use large pages. The
1436 1436   * initialiazation necessary to actually start using large pages
1437 1437   * happens later in the process after segkmem_heap_lp_init() is called.
1438 1438   */
1439 1439  int
1440 1440  segkmem_lpsetup()
1441 1441  {
1442 1442          int use_large_pages = 0;
1443 1443  
1444 1444  #ifdef __sparc
1445 1445  
1446 1446          size_t memtotal = physmem * PAGESIZE;
1447 1447  
1448 1448          if (heap_lp_base == NULL) {
1449 1449                  segkmem_lpsize = PAGESIZE;
1450 1450                  return (0);
1451 1451          }
1452 1452  
1453 1453          /* get a platform dependent value of large page size for kernel heap */
1454 1454          segkmem_lpsize = get_segkmem_lpsize(segkmem_lpsize);
1455 1455  
1456 1456          if (segkmem_lpsize <= PAGESIZE) {
1457 1457                  /*
1458 1458                   * put virtual space reserved for the large page kernel
1459 1459                   * back to the regular heap
1460 1460                   */
1461 1461                  vmem_xfree(heap_arena, heap_lp_base,
1462 1462                      heap_lp_end - heap_lp_base);
1463 1463                  heap_lp_base = NULL;
1464 1464                  heap_lp_end = NULL;
1465 1465                  segkmem_lpsize = PAGESIZE;
1466 1466                  return (0);
1467 1467          }
1468 1468  
1469 1469          /* set heap_lp quantum if necessary */
1470 1470          if (segkmem_heaplp_quantum == 0 || !ISP2(segkmem_heaplp_quantum) ||
1471 1471              P2PHASE(segkmem_heaplp_quantum, segkmem_lpsize)) {
1472 1472                  segkmem_heaplp_quantum = segkmem_lpsize;
1473 1473          }
1474 1474  
1475 1475          /* set kmem_lp quantum if necessary */
1476 1476          if (segkmem_kmemlp_quantum == 0 || !ISP2(segkmem_kmemlp_quantum) ||
1477 1477              segkmem_kmemlp_quantum > segkmem_heaplp_quantum) {
1478 1478                  segkmem_kmemlp_quantum = segkmem_heaplp_quantum;
1479 1479          }
1480 1480  
1481 1481          /* set total amount of memory allowed for large page kernel heap */
1482 1482          if (segkmem_kmemlp_max == 0) {
1483 1483                  if (segkmem_kmemlp_pcnt == 0 || segkmem_kmemlp_pcnt > 100)
1484 1484                          segkmem_kmemlp_pcnt = 12;
1485 1485                  segkmem_kmemlp_max = (memtotal * segkmem_kmemlp_pcnt) / 100;
1486 1486          }
1487 1487          segkmem_kmemlp_max = P2ROUNDUP(segkmem_kmemlp_max,
1488 1488              segkmem_heaplp_quantum);
1489 1489  
1490 1490          /* fix lp kmem preallocation request if necesssary */
1491 1491          if (segkmem_kmemlp_min) {
1492 1492                  segkmem_kmemlp_min = P2ROUNDUP(segkmem_kmemlp_min,
1493 1493                      segkmem_heaplp_quantum);
1494 1494                  if (segkmem_kmemlp_min > segkmem_kmemlp_max)
1495 1495                          segkmem_kmemlp_min = segkmem_kmemlp_max;
1496 1496          }
1497 1497  
1498 1498          use_large_pages = 1;
1499 1499          segkmem_lpszc = page_szc(segkmem_lpsize);
1500 1500          segkmem_lpshift = page_get_shift(segkmem_lpszc);
1501 1501  
1502 1502  #endif
1503 1503          return (use_large_pages);
1504 1504  }
1505 1505  
1506 1506  void
1507 1507  segkmem_zio_init(void *zio_mem_base, size_t zio_mem_size)
1508 1508  {
1509 1509          ASSERT(zio_mem_base != NULL);
1510 1510          ASSERT(zio_mem_size != 0);
1511 1511  
1512 1512          /*
1513 1513           * To reduce VA space fragmentation, we set up quantum caches for the
1514 1514           * smaller sizes;  we chose 32k because that translates to 128k VA
1515 1515           * slabs, which matches nicely with the common 128k zio_data bufs.
1516 1516           */
1517 1517          zio_arena = vmem_create("zfs_file_data", zio_mem_base, zio_mem_size,
1518 1518              PAGESIZE, NULL, NULL, NULL, 32 * 1024, VM_SLEEP);
1519 1519  
1520 1520          zio_alloc_arena = vmem_create("zfs_file_data_buf", NULL, 0, PAGESIZE,
1521 1521              segkmem_zio_alloc, segkmem_zio_free, zio_arena, 0, VM_SLEEP);
1522 1522  
1523 1523          ASSERT(zio_arena != NULL);
1524 1524          ASSERT(zio_alloc_arena != NULL);
1525 1525  }
1526 1526  
1527 1527  #ifdef __sparc
1528 1528  
1529 1529  
1530 1530  static void *
1531 1531  segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)
1532 1532  {
1533 1533          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1534 1534          void   *addr;
1535 1535  
1536 1536          if (ppaquantum <= PAGESIZE)
1537 1537                  return (segkmem_alloc(vmp, size, vmflag));
1538 1538  
1539 1539          ASSERT((size & (ppaquantum - 1)) == 0);
1540 1540  
1541 1541          addr = vmem_xalloc(vmp, size, ppaquantum, 0, 0, NULL, NULL, vmflag);
1542 1542          if (addr != NULL && segkmem_xalloc(vmp, addr, size, vmflag, 0,
1543 1543              segkmem_page_create, NULL) == NULL) {
1544 1544                  vmem_xfree(vmp, addr, size);
1545 1545                  addr = NULL;
1546 1546          }
1547 1547  
1548 1548          return (addr);
1549 1549  }
1550 1550  
1551 1551  static void
1552 1552  segkmem_free_ppa(vmem_t *vmp, void *addr, size_t size)
1553 1553  {
1554 1554          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1555 1555  
1556 1556          ASSERT(addr != NULL);
1557 1557  
1558 1558          if (ppaquantum <= PAGESIZE) {
1559 1559                  segkmem_free(vmp, addr, size);
1560 1560          } else {
1561 1561                  segkmem_free(NULL, addr, size);
1562 1562                  vmem_xfree(vmp, addr, size);
1563 1563          }
1564 1564  }
1565 1565  
1566 1566  void
1567 1567  segkmem_heap_lp_init()
1568 1568  {
1569 1569          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1570 1570          size_t heap_lp_size = heap_lp_end - heap_lp_base;
1571 1571          size_t lpsize = segkmem_lpsize;
1572 1572          size_t ppaquantum;
1573 1573          void   *addr;
1574 1574  
1575 1575          if (segkmem_lpsize <= PAGESIZE) {
1576 1576                  ASSERT(heap_lp_base == NULL);
1577 1577                  ASSERT(heap_lp_end == NULL);
1578 1578                  return;
1579 1579          }
1580 1580  
1581 1581          ASSERT(segkmem_heaplp_quantum >= lpsize);
1582 1582          ASSERT((segkmem_heaplp_quantum & (lpsize - 1)) == 0);
1583 1583          ASSERT(lpcb->lp_uselp == 0);
1584 1584          ASSERT(heap_lp_base != NULL);
1585 1585          ASSERT(heap_lp_end != NULL);
1586 1586          ASSERT(heap_lp_base < heap_lp_end);
1587 1587          ASSERT(heap_lp_arena == NULL);
1588 1588          ASSERT(((uintptr_t)heap_lp_base & (lpsize - 1)) == 0);
1589 1589          ASSERT(((uintptr_t)heap_lp_end & (lpsize - 1)) == 0);
1590 1590  
1591 1591          /* create large page heap arena */
1592 1592          heap_lp_arena = vmem_create("heap_lp", heap_lp_base, heap_lp_size,
1593 1593              segkmem_heaplp_quantum, NULL, NULL, NULL, 0, VM_SLEEP);
1594 1594  
1595 1595          ASSERT(heap_lp_arena != NULL);
1596 1596  
1597 1597          /* This arena caches memory already mapped by large pages */
1598 1598          kmem_lp_arena = vmem_create("kmem_lp", NULL, 0, segkmem_kmemlp_quantum,
1599 1599              segkmem_alloc_lpi, segkmem_free_lpi, heap_lp_arena, 0, VM_SLEEP);
1600 1600  
1601 1601          ASSERT(kmem_lp_arena != NULL);
1602 1602  
1603 1603          mutex_init(&lpcb->lp_lock, NULL, MUTEX_DEFAULT, NULL);
1604 1604          cv_init(&lpcb->lp_cv, NULL, CV_DEFAULT, NULL);
1605 1605  
1606 1606          /*
1607 1607           * this arena is used for the array of page_t pointers necessary
1608 1608           * to call hat_mem_load_array
1609 1609           */
1610 1610          ppaquantum = btopr(lpsize) * sizeof (page_t *);
1611 1611          segkmem_ppa_arena = vmem_create("segkmem_ppa", NULL, 0, ppaquantum,
1612 1612              segkmem_alloc_ppa, segkmem_free_ppa, heap_arena, ppaquantum,
1613 1613              VM_SLEEP);
1614 1614  
1615 1615          ASSERT(segkmem_ppa_arena != NULL);
1616 1616  
1617 1617          /* prealloacate some memory for the lp kernel heap */
1618 1618          if (segkmem_kmemlp_min) {
1619 1619  
1620 1620                  ASSERT(P2PHASE(segkmem_kmemlp_min,
1621 1621                      segkmem_heaplp_quantum) == 0);
1622 1622  
1623 1623                  if ((addr = segkmem_alloc_lpi(heap_lp_arena,
1624 1624                      segkmem_kmemlp_min, VM_SLEEP)) != NULL) {
1625 1625  
1626 1626                          addr = vmem_add(kmem_lp_arena, addr,
1627 1627                              segkmem_kmemlp_min, VM_SLEEP);
1628 1628                          ASSERT(addr != NULL);
1629 1629                  }
1630 1630          }
1631 1631  
1632 1632          lpcb->lp_uselp = 1;
1633 1633  }
1634 1634  
1635 1635  #endif

↓ open down ↓

856 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX