Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Preprocessing;
   6  
   7  use Phpml\Exception\InvalidArgumentException;
   8  
   9  final class OneHotEncoder implements Preprocessor
  10  {
  11      /**
  12       * @var bool
  13       */
  14      private $ignoreUnknown;
  15  
  16      /**
  17       * @var array
  18       */
  19      private $categories = [];
  20  
  21      public function __construct(bool $ignoreUnknown = false)
  22      {
  23          $this->ignoreUnknown = $ignoreUnknown;
  24      }
  25  
  26      public function fit(array $samples, ?array $targets = null): void
  27      {
  28          foreach (array_keys(array_values(current($samples))) as $column) {
  29              $this->fitColumn($column, array_values(array_unique(array_column($samples, $column))));
  30          }
  31      }
  32  
  33      public function transform(array &$samples, ?array &$targets = null): void
  34      {
  35          foreach ($samples as &$sample) {
  36              $sample = $this->transformSample(array_values($sample));
  37          }
  38      }
  39  
  40      private function fitColumn(int $column, array $values): void
  41      {
  42          $count = count($values);
  43          foreach ($values as $index => $value) {
  44              $map = array_fill(0, $count, 0);
  45              $map[$index] = 1;
  46              $this->categories[$column][$value] = $map;
  47          }
  48      }
  49  
  50      private function transformSample(array $sample): array
  51      {
  52          $encoded = [];
  53          foreach ($sample as $column => $feature) {
  54              if (!isset($this->categories[$column][$feature]) && !$this->ignoreUnknown) {
  55                  throw new InvalidArgumentException(sprintf('Missing category "%s" for column %s in trained encoder', $feature, $column));
  56              }
  57  
  58              $encoded = array_merge(
  59                  $encoded,
  60                  $this->categories[$column][$feature] ?? array_fill(0, count($this->categories[$column]), 0)
  61              );
  62          }
  63  
  64          return $encoded;
  65      }
  66  }