Переглянути джерело

Simplify download count migration now that redis db has been cleaned up

Jordi Boggiano 6 роки тому
батько
коміт
c05e001a9b

+ 54 - 9
src/Packagist/WebBundle/Command/MigrateDownloadCountsCommand.php

@@ -40,18 +40,63 @@ class MigrateDownloadCountsCommand extends ContainerAwareCommand
         $packageRepo = $doctrine->getRepository(Package::class);
 
         try {
-            $packagesToProcess = $packageRepo->iterateStaleDownloadCountPackageIds();
-            foreach ($packagesToProcess as $packageDetails) {
-                $packageId = $packageDetails['id'];
-                $logger->debug('Processing package #'.$packageId);
-                $package = $packageRepo->findOneById($packageId);
-                $downloadManager->transferDownloadsToDb($package, $packageDetails['lastUpdated']);
+            // might be a large-ish dataset coming through here
+            ini_set('memory_limit', '1G');
 
-                $doctrine->getManager()->clear();
+            $redis = $this->getContainer()->get('snc_redis.default_client');
+            $now = new \DateTimeImmutable();
+            $todaySuffix = ':'.$now->format('Ymd');
+            $keysToUpdate = $redis->keys('dl:*:*');
 
-                if ($signal->isTriggered()) {
-                    break;
+            // skip today datapoints as we will store that to the DB tomorrow
+            $keysToUpdate = array_filter($keysToUpdate, function ($key) use ($todaySuffix) {
+                return strpos($key, $todaySuffix) === false;
+            });
+
+            // sort by package id, then package datapoint first followed by version datapoints
+            usort($keysToUpdate, function ($a, $b) {
+                $amin = preg_replace('{^(dl:\d+).*}', '$1', $a);
+                $bmin = preg_replace('{^(dl:\d+).*}', '$1', $b);
+
+                if ($amin !== $bmin) {
+                    return strcmp($amin, $bmin);
                 }
+
+                return strcmp($b, $a);
+            });
+
+            // buffer keys per package id and process all keys for a given package one by one
+            // to reduce SQL load
+            $buffer = [];
+            $lastPackageId = null;
+            while ($keysToUpdate) {
+                $key = array_shift($keysToUpdate);
+                if (!preg_match('{^dl:(\d+)}', $key, $m)) {
+                    $logger->error('Invalid dl key found: '.$key);
+                    continue;
+                }
+
+                $packageId = (int) $m[1];
+
+                if ($lastPackageId && $lastPackageId !== $packageId) {
+                    $logger->warning('Processing package #'.$lastPackageId);
+                    $downloadManager->transferDownloadsToDb($lastPackageId, $buffer, $now);
+                    $buffer = [];
+
+                    $doctrine->getManager()->clear();
+
+                    if ($signal->isTriggered()) {
+                        break;
+                    }
+                }
+
+                $buffer[] = $key;
+                $lastPackageId = $packageId;
+            }
+
+            // process last package
+            if ($buffer) {
+                $downloadManager->transferDownloadsToDb($lastPackageId, $buffer, $now);
             }
         } finally {
             $locker->unlockCommand($this->getName());

+ 39 - 71
src/Packagist/WebBundle/Model/DownloadManager.php

@@ -13,6 +13,7 @@
 namespace Packagist\WebBundle\Model;
 
 use Doctrine\Common\Persistence\ManagerRegistry;
+use Doctrine\DBAL\Connection;
 use Packagist\WebBundle\Entity\Package;
 use Packagist\WebBundle\Entity\Version;
 use Packagist\WebBundle\Entity\Download;
@@ -167,88 +168,71 @@ class DownloadManager
         $this->redis->downloadsIncr(...$args);
     }
 
-    public function transferDownloadsToDb(Package $package, DateTimeImmutable $lastUpdated)
+    public function transferDownloadsToDb(int $packageId, array $keys, DateTimeImmutable $now)
     {
-        // might be a large dataset coming through here especially on first run due to historical data
-        ini_set('memory_limit', '1G');
-
-        $packageId = $package->getId();
-        $rows = $this->doctrine->getManager()->getConnection()->fetchAll('SELECT id FROM package_version WHERE package_id = :id', ['id' => $packageId]);
-        $versionIds = [];
-        foreach ($rows as $row) {
-            $versionIds[] = $row['id'];
+        $package = $this->doctrine->getRepository(Package::class)->findOneById($packageId);
+        // package was deleted in the meantime, abort
+        if (!$package) {
+            $this->redis->del($keys);
+            return;
         }
 
-        $now = new DateTimeImmutable();
-        $keys = [];
-        $firstIteration = true;
-        while ($lastUpdated < $now) {
-            // TODO delete once the redis db has been purged
-            if ($firstIteration || $lastUpdated->format('d') === '01') {
-                $firstIteration = false;
-                // dl:$package:Ym
-                $keys[] = 'dl:'.$packageId.':'.$lastUpdated->format('Ym');
-                foreach ($versionIds as $id) {
-                    // dl:$package-$version and dl:$package-$version:Ym
-                    $keys[] = 'dl:'.$packageId.'-'.$id;
-                    $keys[] = 'dl:'.$packageId.'-'.$id.':'.$lastUpdated->format('Ym');
-                }
-            }
-
-            // dl:$package:Ymd
-            $keys[] = 'dl:'.$packageId.':'.$lastUpdated->format('Ymd');
-            foreach ($versionIds as $id) {
-                // dl:$package-$version:Ymd
-                $keys[] = 'dl:'.$packageId.'-'.$id.':'.$lastUpdated->format('Ymd');
+        $versionsWithDownloads = [];
+        foreach ($keys as $key) {
+            if (preg_match('{^dl:'.$packageId.'-(\d+):\d+$}', $key, $match)) {
+                $versionsWithDownloads[(int) $match[1]] = true;
             }
+        }
 
-            $lastUpdated = $lastUpdated->modify('+1day');
+        $rows = $this->doctrine->getManager()->getConnection()->fetchAll(
+            'SELECT id FROM package_version WHERE id IN (:ids)',
+            ['ids' => array_keys($versionsWithDownloads)],
+            ['ids' => Connection::PARAM_INT_ARRAY]
+        );
+        $versionIds = [];
+        foreach ($rows as $row) {
+            $versionIds[] = (int) $row['id'];
         }
+        unset($versionsWithDownloads, $rows, $row);
 
         sort($keys);
 
-        $buffer = [];
-        $toDelete = [];
-        $lastPrefix = '';
-
-        foreach ($keys as $key) {
-            // ignore IP keys temporarily until they all switch to throttle:* prefix
-            if (preg_match('{^dl:\d+:(\d+\.|[0-9a-f]+:[0-9a-f]+:)}', $key)) {
-                continue;
-            }
+        $values = $this->redis->mget($keys);
 
-            // delete version totals when we find one
-            if (preg_match('{^dl:\d+-\d+$}', $key)) {
-                $toDelete[] = $key;
-                continue;
-            }
+        $buffer = [];
+        $lastPrefix = null;
 
+        foreach ($keys as $index => $key) {
             $prefix = preg_replace('{:\d+$}', ':', $key);
 
             if ($lastPrefix && $prefix !== $lastPrefix && $buffer) {
-                $toDelete = $this->createDbRecordsForKeys($package, $buffer, $toDelete, $now);
+                $this->createDbRecordsForKeys($package, $buffer, $versionIds, $now);
                 $buffer = [];
             }
 
-            $buffer[] = $key;
+            $buffer[$key] = (int) $values[$index];
             $lastPrefix = $prefix;
         }
 
         if ($buffer) {
-            $toDelete = $this->createDbRecordsForKeys($package, $buffer, $toDelete, $now);
+            $this->createDbRecordsForKeys($package, $buffer, $versionIds, $now);
         }
 
         $this->doctrine->getManager()->flush();
 
-        while ($toDelete) {
-            $batch = array_splice($toDelete, 0, 1000);
-            $this->redis->del($batch);
-        }
+        $this->redis->del($keys);
     }
 
-    private function createDbRecordsForKeys(Package $package, array $keys, array $toDelete, DateTimeImmutable $now): array
+    private function createDbRecordsForKeys(Package $package, array $keys, array $validVersionIds, DateTimeImmutable $now)
     {
-        list($id, $type) = $this->getKeyInfo($keys[0]);
+        reset($keys);
+        list($id, $type) = $this->getKeyInfo(key($keys));
+
+        // skip if the version was deleted in the meantime
+        if ($type === Download::TYPE_VERSION && !in_array($id, $validVersionIds, true)) {
+            return;
+        }
+
         $record = $this->doctrine->getRepository(Download::class)->findOneBy(['id' => $id, 'type' => $type]);
         $isNewRecord = false;
         if (!$record) {
@@ -259,27 +243,13 @@ class DownloadManager
             $isNewRecord = true;
         }
 
-        $today = date('Ymd');
         $record->setLastUpdated($now);
 
-        $values = $this->redis->mget($keys);
-        foreach ($keys as $index => $key) {
+        foreach ($keys as $key => $val) {
             $date = preg_replace('{^.*?:(\d+)$}', '$1', $key);
-
-            // monthly data point, discard
-            if (strlen($date) === 6) {
-                $toDelete[] = $key;
-                continue;
-            }
-
-            $val = (int) $values[$index];
             if ($val) {
                 $record->setDataPoint($date, $val);
             }
-            // today's value is not deleted yet as it might not be complete and we want to update it when its complete
-            if ($date !== $today) {
-                $toDelete[] = $key;
-            }
         }
 
         // only store records for packages or for versions that have had downloads to avoid storing empty records
@@ -288,8 +258,6 @@ class DownloadManager
         }
 
         $record->computeSum();
-
-        return $toDelete;
     }
 
     private function getKeyInfo(string $key): array