eZ Platform Discussions

Reindex all the arbo in SOLR except for a particular ContentType


#1
<?php

namespace Smile\ToolsBundle\Command;

use eZ\Bundle\EzPublishCoreBundle\Command\ReindexCommand;
use eZ\Publish\API\Repository\Repository;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

use eZ\Publish\SPI\Persistence\Content\ContentInfo;
use eZ\Publish\Core\Search\Common\Indexer;
use Doctrine\DBAL\Driver\Statement;
use Symfony\Component\Console\Helper\ProgressBar;
use RuntimeException;
use PDO;
/**
 * Class SmileReindexCommand
 *
 * Extend ezplatform:reindex using Admin user
 *
 * @package Smile\ToolsBundle\Command
 */
class SmileReindexCommand extends ReindexCommand
{

    /**
     * @var \Doctrine\DBAL\Connection
     */
    protected $connection;

    /**
     * @var \eZ\Publish\Core\Search\Common\Indexer|\eZ\Publish\Core\Search\Common\IncrementalIndexer
     */
    protected $searchIndexer;

    const CREATOR_ID = 14;

    /**
     * {@inheritdoc}
     */
    protected function configure()
    {
        $txt = "Extend <info>ezplatform:reindex</info> using Admin user.";
        parent::configure();
        $this->setName('smile:tools:reindex');
        $this->setDescription($txt . ' > ' . $this->getDescription());
        $this->setHelp('<fg=blue>'.$txt.'</>' . PHP_EOL.PHP_EOL . $this->getHelp());
        $this->addOption('exclude-content-types', null, InputOption::VALUE_OPTIONAL);
    }


    // =================================================================================================================
    // exclude-content-types

    /**
     * @param InputInterface $input
     * @param OutputInterface $output
     * @param $iterationCount
     * @param $commit
     * @throws \eZ\Publish\API\Repository\Exceptions\NotFoundException
     * @throws \Exception
     */
    protected function indexIncrementally(InputInterface $input, OutputInterface $output, $iterationCount, $commit)
    {
        if ($exclude_content_types = $input->getOption('exclude-content-types')) {
            $exclude_content_types = explode(',', $exclude_content_types);
            $exclude_content_types = array_map('trim', $exclude_content_types);
            $exclude_content_types = array_filter($exclude_content_types);

            /**
             * @var $contentTypeHandler \eZ\Publish\SPI\Persistence\Content\Type\Handler
             */
            $contentTypeHandler = $this->getContainer()->get('ezpublish.spi.persistence.content_type_handler');
            $exclude_content_types_ids = [];
            $exclude_content_types_identifieurs = [];
            foreach ($exclude_content_types as &$e) {
                if (is_numeric($e)) {
                    $ct = $contentTypeHandler->load($e);
                } else {
                    $ct = $contentTypeHandler->loadByIdentifier($e);
                }
                $exclude_content_types_ids[] = $ct->id;
                $exclude_content_types_identifieurs[] = $ct->identifier;
            }

            if (!$exclude_content_types_ids) {
                throw new \Exception("exclude-content-types error");
            }

            $output->writeln(sprintf(
                'Indexing exclude [%s]' . ($commit ? ', with commit' : ''),
                implode(', ', $exclude_content_types_identifieurs)
            ));

            $stmt = $this->getStatementExclude($exclude_content_types_ids);
            $count = (int) $this->getStatementExclude($exclude_content_types_ids, true)->fetchColumn();
            $purge = false;

            return $this->indexIncrementallyProcess($input, $output, $iterationCount, $commit, $stmt, $count, $purge);
        } else {
            return parent::indexIncrementally($input, $output, $iterationCount, $commit);
        }
    }

    /**
     *
     * @param array $exclude_content_types
     * @param bool $count
     *
     * @return \Doctrine\DBAL\Driver\Statement
     */
    protected function getStatementExclude(array $exclude_content_types, $count = false)
    {
        $qb = $this->connection->createQueryBuilder();

        $q = $this->connection->createQueryBuilder()
            ->select($count ? 'count(DISTINCT c.id)' : 'DISTINCT c.id')
            ->from('ezcontentobject', 'c')
            ->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
            ->where('c.status = :status')
            ->andWhere($qb->expr()->notIn('c.contentclass_id', $exclude_content_types))
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT);
        return $q->execute();
    }


    // =================================================================================================================
    // logAsSuperAdmin

    /**
     * @param InputInterface $input
     * @param OutputInterface $output
     * @return int|null|void
     * @throws \Exception
     */
    protected function execute(InputInterface $input, OutputInterface $output)
    {
        $this->logAsSuperAdmin();
        parent::execute($input, $output);
    }

    /**
     * Se loger en tant que super admin.
     *
     * @throws \Exception
     */
    protected function logAsSuperAdmin()
    {
        /** @var Repository $repository */
        $repository = $this->getContainer()->get('ezpublish.api.repository');
        $userID = self::CREATOR_ID;
        $user = $repository->getUserService()->loadUser($userID);
        $repository->setCurrentUser($user);
    }


    // =================================================================================================================
    // Code qui serait mieux dans la classe mere.



    /**
     * Initialize objects required by {@see execute()}.
     *
     * @param InputInterface $input
     * @param OutputInterface $output
     */
    public function initialize(InputInterface $input, OutputInterface $output)
    {
        parent::initialize($input, $output);
        $this->searchIndexer = $this->getContainer()->get('ezpublish.spi.search.indexer');
        $this->connection = $this->getContainer()->get('ezpublish.api.storage_engine.legacy.connection');
        //$this->logger = $this->getContainer()->get('logger');
        if (!$this->searchIndexer instanceof Indexer) {
            throw new RuntimeException(
                sprintf(
                    'Expected to find Search Engine Indexer but found "%s" instead',
                    get_parent_class($this->searchIndexer)
                )
            );
        }
    }

    /**
     * Obliger de copier la fonction d'origine car elle est privée :'(
     *
     * @param \Doctrine\DBAL\Driver\Statement $stmt
     * @param int $iterationCount
     *
     * @return \Generator Return an array of arrays, each array contains content id's of $iterationCount.
     */
    protected function fetchIteration(Statement $stmt, $iterationCount)
    {
        do {
            $contentIds = [];
            for ($i = 0; $i < $iterationCount; ++$i) {
                if ($contentId = $stmt->fetch(PDO::FETCH_COLUMN)) {
                    $contentIds[] = $contentId;
                } else {
                    break;
                }
            }

            yield $contentIds;
        } while (!empty($contentId));
    }

    /**
     * Partie commune de la fonction indexIncrementally()
     *
     *
     * @param InputInterface $input
     * @param OutputInterface $output
     * @param $iterationCount
     * @param $commit
     * @param $stmt
     * @param $count
     * @param $purge
     */
    protected function indexIncrementallyProcess(InputInterface $input, OutputInterface $output, $iterationCount, $commit, $stmt, $count, $purge)
    {
        $iterations = ceil($count / $iterationCount);
        // $processes = $input->getOption('processes');
        $processCount = 1; //$processes === 'auto' ? $this->getNumberOfCPUCores() - 1 : (int) $processes;
        $processCount = min($iterations, $processCount);
        $processMessage = $processCount > 1 ? "using $processCount parallel child processes" : 'using single (current) process';

        if ($purge) {
            $output->writeln('Purging index...');
            $this->searchIndexer->purge();

            $output->writeln(
                "<info>Re-Creating index for {$count} items across $iterations iteration(s), $processMessage:</info>"
            );
        } else {
            $output->writeln(
                "<info>Refreshing index for {$count} items across $iterations iteration(s), $processMessage:</info>"
            );
        }

        $progress = new ProgressBar($output);
        $progress->start($iterations);

        if ($processCount > 1) {
            //$this->runParallelProcess($progress, $stmt, (int) $processCount, (int) $iterationCount, $commit);
        } else {
            // if we only have one process, or less iterations to warrant running several, we index it all inline
            foreach ($this->fetchIteration($stmt, $iterationCount) as $contentIds) {
                $this->searchIndexer->updateSearchIndex($contentIds, $commit);
                $progress->advance(1);
            }
        }

        $progress->finish();
    }
}