<?php

namespace Drupal\taxonomy_overview;

use Wamania\Snowball\StemmerFactory;

/**
 *
 */
class TagsOverviewTermNormalizer {

  protected $stemmer;

  public function __construct($language = 'en') {
    $this->stemmer = StemmerFactory::create($language);
  }

  /**
   *
   */
  public function normalize($term) {
    $tokens = preg_split('/\s+/', strtolower($term));
    $stems = array_map(fn($token) => $this->stemmer->stem($token), $tokens);
    sort($stems);
    return implode(' ', $stems);
  }

  /**
   *
   */
  public function groupSimilarTerms(array $terms) {
    $normalized = [];
    $groups = [];

    foreach ($terms as $tid => $original) {
      $key = $this->normalize($original);
      $normalized[$tid] = $key;
    }

    foreach ($normalized as $tid => $base) {
      $matched = FALSE;
      foreach ($groups as $groupKey => &$group) {
        if (levenshtein($groupKey, $base) <= 2) {
          $group[$tid] = $terms[$tid];
          $matched = TRUE;
          break;
        }
      }
      if (!$matched) {
        $groups[$base] = [$tid => $terms[$tid]];
      }
    }

    return $groups;
  }

}
