Browse Source

Make indexer work in batches

Jordi Boggiano 13 năm trước cách đây
mục cha
commit
d9effa6b77

+ 29 - 18
src/Packagist/WebBundle/Command/IndexPackagesCommand.php

@@ -53,13 +53,18 @@ class IndexPackagesCommand extends ContainerAwareCommand
         $solarium = $this->getContainer()->get('solarium.client');
 
         if ($package) {
-            $packages = array($doctrine->getRepository('PackagistWebBundle:Package')->findOneByName($package));
+            $packages = array(array('id' => $doctrine->getRepository('PackagistWebBundle:Package')->findOneByName($package)->getId()));
         } elseif ($force) {
-            $packages = $doctrine->getRepository('PackagistWebBundle:Package')->findAll();
+            $packages = $doctrine->getEntityManager()->getConnection()->fetchAll('SELECT id FROM package ORDER BY id ASC');
         } else {
             $packages = $doctrine->getRepository('PackagistWebBundle:Package')->getStalePackagesForIndexing();
         }
 
+        $ids = array();
+        foreach ($packages as $package) {
+            $ids[] = $package['id'];
+        }
+
         // clear index before a full-update
         if ($force && !$package) {
             if ($verbose) {
@@ -74,25 +79,31 @@ class IndexPackagesCommand extends ContainerAwareCommand
         }
 
         // update package index
-        foreach ($packages as $package) {
-            if ($verbose) {
-                $output->writeln('Indexing '.$package->getName());
+        while ($ids) {
+            $packages = $doctrine->getRepository('PackagistWebBundle:Package')->getFullPackages(array_splice($ids, 0, 50));
+
+            foreach ($packages as $package) {
+                if ($verbose) {
+                    $output->writeln('Indexing '.$package->getName());
+                }
+
+                try {
+                    $update = $solarium->createUpdate();
+                    $document = $update->createDocument();
+                    $this->updateDocumentFromPackage($document, $package);
+                    $update->addDocument($document);
+                    $update->addCommit();
+                    $solarium->update($update);
+                    $package->setIndexedAt(new \DateTime);
+                } catch (\Exception $e) {
+                    $output->writeln('<error>Exception: '.$e->getMessage().', skipping package '.$package->getName().'.</error>');
+                }
             }
 
-            try {
-                $update = $solarium->createUpdate();
-                $document = $update->createDocument();
-                $this->updateDocumentFromPackage($document, $package);
-                $update->addDocument($document);
-                $update->addCommit();
-                $solarium->update($update);
-                $package->setIndexedAt(new \DateTime);
-            } catch (\Exception $e) {
-                $output->writeln('<error>Exception: '.$e->getMessage().', skipping package '.$package->getName().'.</error>');
-            }
+            $doctrine->getEntityManager()->flush();
+            $doctrine->getEntityManager()->clear();
+            unset($packages);
         }
-
-        $doctrine->getEntityManager()->flush();
     }
 
     private function updateDocumentFromPackage(\Solarium_Document_ReadWrite $document, Package $package)

+ 2 - 14
src/Packagist/WebBundle/Entity/PackageRepository.php

@@ -83,21 +83,9 @@ class PackageRepository extends EntityRepository
 
     public function getStalePackagesForIndexing()
     {
-        $qb = $this->getEntityManager()->createQueryBuilder();
-        $qb->select('p', 'v', 't', 'a', 'req', 'devReq', 'sug', 'rep', 'con', 'pro')
-            ->from('Packagist\WebBundle\Entity\Package', 'p')
-            ->leftJoin('p.versions', 'v')
-            ->leftJoin('v.tags', 't')
-            ->leftJoin('v.authors', 'a')
-            ->leftJoin('v.require', 'req')
-            ->leftJoin('v.devRequire', 'devReq')
-            ->leftJoin('v.suggest', 'sug')
-            ->leftJoin('v.replace', 'rep')
-            ->leftJoin('v.conflict', 'con')
-            ->leftJoin('v.provide', 'pro')
-            ->where('p.indexedAt IS NULL OR p.indexedAt < p.crawledAt');
+        $conn = $this->getEntityManager()->getConnection();
 
-        return $qb->getQuery()->getResult();
+        return $conn->fetchAll('SELECT p.id FROM package p WHERE p.indexedAt IS NULL OR p.indexedAt < p.crawledAt ORDER BY p.id ASC');
     }
 
     public function getStalePackagesForDumping()