Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 311 and 400] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\FeatureExtraction;
   6  
   7  use Phpml\Transformer;
   8  
   9  class TfIdfTransformer implements Transformer
  10  {
  11      /**
  12       * @var array
  13       */
  14      private $idf = [];
  15  
  16      public function __construct(array $samples = [])
  17      {
  18          if (count($samples) > 0) {
  19              $this->fit($samples);
  20          }
  21      }
  22  
  23      public function fit(array $samples, ?array $targets = null): void
  24      {
  25          $this->countTokensFrequency($samples);
  26  
  27          $count = count($samples);
  28          foreach ($this->idf as &$value) {
  29              $value = log((float) ($count / $value), 10.0);
  30          }
  31      }
  32  
  33      public function transform(array &$samples): void
  34      {
  35          foreach ($samples as &$sample) {
  36              foreach ($sample as $index => &$feature) {
  37                  $feature *= $this->idf[$index];
  38              }
  39          }
  40      }
  41  
  42      private function countTokensFrequency(array $samples): void
  43      {
  44          $this->idf = array_fill_keys(array_keys($samples[0]), 0);
  45  
  46          foreach ($samples as $sample) {
  47              foreach ($sample as $index => $count) {
  48                  if ($count > 0) {
  49                      ++$this->idf[$index];
  50                  }
  51              }
  52          }
  53      }
  54  }