Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 39 and 400]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching;
   4  
   5  use Box\Spout\Reader\XLSX\Creator\HelperFactory;
   6  
   7  /**
   8   * Class CachingStrategyFactory
   9   */
  10  class CachingStrategyFactory
  11  {
  12      /**
  13       * The memory amount needed to store a string was obtained empirically from this data:
  14       *
  15       *        ------------------------------------
  16       *        | Number of chars⁺ | Memory needed |
  17       *        ------------------------------------
  18       *        |           3,000  |         1 MB  |
  19       *        |          15,000  |         2 MB  |
  20       *        |          30,000  |         5 MB  |
  21       *        |          75,000  |        11 MB  |
  22       *        |         150,000  |        21 MB  |
  23       *        |         300,000  |        43 MB  |
  24       *        |         750,000  |       105 MB  |
  25       *        |       1,500,000  |       210 MB  |
  26       *        |       2,250,000  |       315 MB  |
  27       *        |       3,000,000  |       420 MB  |
  28       *        |       4,500,000  |       630 MB  |
  29       *        ------------------------------------
  30       *
  31       *        ⁺ All characters were 1 byte long
  32       *
  33       * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
  34       * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
  35       * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
  36       *
  37       * This means that in order to store one shared string in memory, the memory amount needed is:
  38       *   => 20 * 600 ≈ 12KB
  39       */
  40      const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
  41  
  42      /**
  43       * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
  44       * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
  45       * and the string will be quickly retrieved.
  46       * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
  47       * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
  48       * best when the indexes of the shared strings are sorted in the sheet data.
  49       * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
  50       */
  51      const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
  52  
  53      /**
  54       * Returns the best caching strategy, given the number of unique shared strings
  55       * and the amount of memory available.
  56       *
  57       * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
  58       * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
  59       * @param HelperFactory $helperFactory Factory to create helpers
  60       * @return CachingStrategyInterface The best caching strategy
  61       */
  62      public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory)
  63      {
  64          if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
  65              return new InMemoryStrategy($sharedStringsUniqueCount);
  66          }
  67  
  68          return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory);
  69      }
  70  
  71      /**
  72       * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
  73       * and the amount of memory available.
  74       *
  75       * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
  76       * @return bool
  77       */
  78      protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
  79      {
  80          // if the number of shared strings in unknown, do not use "in memory" strategy
  81          if ($sharedStringsUniqueCount === null) {
  82              return false;
  83          }
  84  
  85          $memoryAvailable = $this->getMemoryLimitInKB();
  86  
  87          if ($memoryAvailable === -1) {
  88              // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
  89              $isInMemoryStrategyUsageSafe = ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
  90          } else {
  91              $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
  92              $isInMemoryStrategyUsageSafe = ($memoryAvailable > $memoryNeeded);
  93          }
  94  
  95          return $isInMemoryStrategyUsageSafe;
  96      }
  97  
  98      /**
  99       * Returns the PHP "memory_limit" in Kilobytes
 100       *
 101       * @return float
 102       */
 103      protected function getMemoryLimitInKB()
 104      {
 105          $memoryLimitFormatted = $this->getMemoryLimitFromIni();
 106          $memoryLimitFormatted = \strtolower(\trim($memoryLimitFormatted));
 107  
 108          // No memory limit
 109          if ($memoryLimitFormatted === '-1') {
 110              return -1;
 111          }
 112  
 113          if (\preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
 114              $amount = (int) ($matches[1]);
 115              $unit = $matches[2];
 116  
 117              switch ($unit) {
 118                  case 'b': return ($amount / 1024);
 119                  case 'k': return $amount;
 120                  case 'm': return ($amount * 1024);
 121                  case 'g': return ($amount * 1024 * 1024);
 122                  case 't': return ($amount * 1024 * 1024 * 1024);
 123              }
 124          }
 125  
 126          return -1;
 127      }
 128  
 129      /**
 130       * Returns the formatted "memory_limit" value
 131       *
 132       * @return string
 133       */
 134      protected function getMemoryLimitFromIni()
 135      {
 136          return \ini_get('memory_limit');
 137      }
 138  }