Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
   6  
   7  /**
   8   * @internal
   9   */
  10  final class CachingStrategyFactory
  11  {
  12      /**
  13       * The memory amount needed to store a string was obtained empirically from this data:.
  14       *
  15       *        ------------------------------------
  16       *        | Number of chars⁺ | Memory needed |
  17       *        ------------------------------------
  18       *        |           3,000  |         1 MB  |
  19       *        |          15,000  |         2 MB  |
  20       *        |          30,000  |         5 MB  |
  21       *        |          75,000  |        11 MB  |
  22       *        |         150,000  |        21 MB  |
  23       *        |         300,000  |        43 MB  |
  24       *        |         750,000  |       105 MB  |
  25       *        |       1,500,000  |       210 MB  |
  26       *        |       2,250,000  |       315 MB  |
  27       *        |       3,000,000  |       420 MB  |
  28       *        |       4,500,000  |       630 MB  |
  29       *        ------------------------------------
  30       *
  31       *        ⁺ All characters were 1 byte long
  32       *
  33       * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
  34       * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
  35       * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
  36       *
  37       * This means that in order to store one shared string in memory, the memory amount needed is:
  38       *   => 20 * 600 ≈ 12KB
  39       */
  40      public const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
  41  
  42      /**
  43       * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
  44       * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
  45       * and the string will be quickly retrieved.
  46       * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
  47       * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
  48       * best when the indexes of the shared strings are sorted in the sheet data.
  49       * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
  50       */
  51      public const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
  52  
  53      private MemoryLimit $memoryLimit;
  54  
  55      public function __construct(MemoryLimit $memoryLimit)
  56      {
  57          $this->memoryLimit = $memoryLimit;
  58      }
  59  
  60      /**
  61       * Returns the best caching strategy, given the number of unique shared strings
  62       * and the amount of memory available.
  63       *
  64       * @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
  65       * @param string   $tempFolder               Temporary folder where the temporary files to store shared strings will be stored
  66       *
  67       * @return CachingStrategyInterface The best caching strategy
  68       */
  69      public function createBestCachingStrategy(?int $sharedStringsUniqueCount, string $tempFolder): CachingStrategyInterface
  70      {
  71          if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
  72              return new InMemoryStrategy($sharedStringsUniqueCount);
  73          }
  74  
  75          return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
  76      }
  77  
  78      /**
  79       * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
  80       * and the amount of memory available.
  81       *
  82       * @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
  83       */
  84      private function isInMemoryStrategyUsageSafe(?int $sharedStringsUniqueCount): bool
  85      {
  86          // if the number of shared strings in unknown, do not use "in memory" strategy
  87          if (null === $sharedStringsUniqueCount) {
  88              return false;
  89          }
  90  
  91          $memoryAvailable = $this->memoryLimit->getMemoryLimitInKB();
  92  
  93          if (-1 === (int) $memoryAvailable) {
  94              // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
  95              $isInMemoryStrategyUsageSafe = ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
  96          } else {
  97              $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
  98              $isInMemoryStrategyUsageSafe = ($memoryAvailable > $memoryNeeded);
  99          }
 100  
 101          return $isInMemoryStrategyUsageSafe;
 102      }
 103  }