eZ Platform Discussions

Command which loops on a large amount of data


#1

Salutation !

I have many scripts that have to deal with very large amounts of content (more than 10k).
To perform different operations:

  • Changing attributes.
  • Adding a Location.
  • Hide/make visible.

But my scripts are very long and they get killed before they get to the end.

I’m looking for a way to optimize all this.

The script must meet the following needs:

  • He must browse the Content or Sites corresponding to certain criteria.

    • Location,
    • ContentType,
    • Attribute VAlue,
  • The contents may no longer meet the criteria once the operation is completed.
    Example: Move Location in initial_import_flat to their good location in the tree.

  • It must support a dry-run mode


#2

For the moment I’ve come to that:

<?php
class FooCommand extends ContainerAwareCommand
{
    private $dry_run = false;
    private $shouldStop = false;

    /** @var InputInterface */
    private $input;
    /** @var OutputInterface */
    private $output;

    protected function configure()
    {
        $this
            ->setName('my:foo')
            ->addOption('dry-run', 'D', InputOption::VALUE_NONE, "")
            ->addOption('break', '', InputOption::VALUE_OPTIONAL, "")
            ->addOption('offset', '', InputOption::VALUE_OPTIONAL, "offset")
        ;
    }

    protected function execute(InputInterface $input, OutputInterface $output)
    {
        $this->dry_run = (bool)$input->getOption('dry-run');

        $break  = (int)$input->getOption('break');
        $offset = (int)$this->input->getOption('offset')

        $this->input = $input;
        $this->output = $output;

        $this->processLoop($break, $offset);

        $output->writeln('END');
    }

    public function processLoop($break, $init_offset)
    {
        set_time_limit(0);

        $limit = 50;
        $searchResult = $this->getSearchResult($offset, 1);

        $ii = 0;
        $il = 0; // i loop
        $TT = $searchResult->totalCount;

        pcntl_signal(SIGTERM, [$this, 'stopCommand']);
        pcntl_signal(SIGINT, [$this, 'stopCommand']);
        $this->shouldStop = false;

        do {
            $offset = max(0, $TT - ++$il*$limit - $init_offset);
            $searchResult = $this->getSearchResult($offset, $limit); // Loop form end to 0

            for($i = count($searchResult->searchHits)-1; $i >= 0; $i--) { // Reverse loop.
                $hit = $searchResult->searchHits[$i];
                $ii++;

                $this->processLoopItem($hit, $ii, $TT);

                if ($break && $break <= $ii) {
                    $this->output->writeln("BREAK $break $ii");
                    break(2);
                }

                pcntl_signal_dispatch(); // La fonction pcntl_signal_dispatch() appelle les gestionnaires de signaux installés par pcntl_signal() pour chaque signal en attente.
                if ( $this->shouldStop ) {
                    $this->output->writeln("Ctrl+C");
                    break(2);
                }

            } // END foreach
        } while (count($searchResult->searchHits) && $offset > 0);

        $this->output->writeln( "END LOOP" );
    }

    /**
     * Cette fonction est exécuté lors que la commande reçoit le signal d'arret.
     * Mais pas lorsqu'elle est 'killed'
     *
     * ```
     * pcntl_signal(SIGTERM, [$this, 'stopCommand']);
     * pcntl_signal(SIGINT, [$this, 'stopCommand']);
     * ```
     */
    public function stopCommand()
    {
        $this->shouldStop = true;
    }

    public function processLoopItem(SearchHit $hit, $ii, $TT)
    {
        // Do what you have to do.
	if ($this->dry_run) {}
    }

    public function getSearchResult($offset, $limit)
    {
        $query = new Query();

        $query->limit  = $limit;
        $query->offset = $offset;

        $and = [
            new Query\Criterion\ContentTypeIdentifier(['article', 'breve']),
        ];

 	// ... And criterions ... 

        $query->filter = new Query\Criterion\LogicalAnd($and);

        //$query->sortClauses = [        ];

        return $this->getRepository()->getSearchService()->findContent($query);
    }
}

How can this be improved?
Am I completely off track and there is a simpler solution?