Browse Source

Move the metadata gc on primary node to a separate process

Jordi Boggiano 5 years ago
parent
commit
bb8b205113

+ 21 - 5
src/Packagist/WebBundle/Command/DumpPackagesCommand.php

@@ -31,6 +31,7 @@ class DumpPackagesCommand extends ContainerAwareCommand
             ->setName('packagist:dump')
             ->setDefinition(array(
                 new InputOption('force', null, InputOption::VALUE_NONE, 'Force a dump of all packages'),
+                new InputOption('gc', null, InputOption::VALUE_NONE, 'Runs garbage collection of old files'),
             ))
             ->setDescription('Dumps the packages into a packages.json + included files')
         ;
@@ -41,8 +42,9 @@ class DumpPackagesCommand extends ContainerAwareCommand
      */
     protected function execute(InputInterface $input, OutputInterface $output)
     {
-        $force = (Boolean) $input->getOption('force');
-        $verbose = (Boolean) $input->getOption('verbose');
+        $force = (bool) $input->getOption('force');
+        $gc = (bool) $input->getOption('gc');
+        $verbose = (bool) $input->getOption('verbose');
 
         $deployLock = $this->getContainer()->getParameter('kernel.cache_dir').'/deploy.globallock';
         if (file_exists($deployLock)) {
@@ -53,8 +55,12 @@ class DumpPackagesCommand extends ContainerAwareCommand
         }
 
         // another dumper is still active
+        $lockName = $this->getName();
+        if ($gc) {
+            $lockName .= '-gc';
+        }
         $locker = $this->getContainer()->get('locker');
-        if (!$locker->lockCommand($this->getName())) {
+        if (!$locker->lockCommand($lockName)) {
             if ($verbose) {
                 $output->writeln('Aborting, another task is running already');
             }
@@ -62,6 +68,16 @@ class DumpPackagesCommand extends ContainerAwareCommand
         }
 
         $doctrine = $this->getContainer()->get('doctrine');
+        $dumper = $this->getContainer()->get('packagist.package_dumper');
+
+        if ($gc) {
+            try {
+                $dumper->gc();
+            } finally {
+                $locker->unlockCommand($lockName);
+            }
+            return 0;
+        }
 
         if ($force) {
             $packages = $doctrine->getManager()->getConnection()->fetchAll('SELECT id FROM package WHERE replacementPackage != "spam/spam" OR replacementPackage IS NULL ORDER BY id ASC');
@@ -84,9 +100,9 @@ class DumpPackagesCommand extends ContainerAwareCommand
         gc_enable();
 
         try {
-            $result = $this->getContainer()->get('packagist.package_dumper')->dump($ids, $force, $verbose);
+            $result = $dumper->dump($ids, $force, $verbose);
         } finally {
-            $locker->unlockCommand($this->getName());
+            $locker->unlockCommand($lockName);
         }
 
         return $result ? 0 : 1;

+ 29 - 17
src/Packagist/WebBundle/Package/SymlinkDumper.php

@@ -154,8 +154,6 @@ class SymlinkDumper
             throw new \RuntimeException('Metadata store not mounted, can not dump metadata');
         }
 
-        $cleanUpOldFiles = date('i') == 0;
-
         // prepare build dir
         $webDir = $this->webDir;
 
@@ -321,7 +319,6 @@ class SymlinkDumper
             if ($verbose) {
                 echo 'Preparing individual files listings'.PHP_EOL;
             }
-            $safeFiles = array();
             $individualHashedListings = array();
             $finder = Finder::create()->files()->ignoreVCS(true)->name('*.json')->in($buildDir)->depth('1');
 
@@ -340,7 +337,6 @@ class SymlinkDumper
                 $listing = $this->getTargetListing($file);
                 $hash = hash_file('sha256', $file);
                 $key = substr($key, 0, -5);
-                $safeFiles[] = $key.'$'.$hash.'.json';
                 $this->listings[$listing]['providers'][$key] = array('sha256' => $hash);
                 $individualHashedListings[$listing] = true;
             }
@@ -365,7 +361,6 @@ class SymlinkDumper
                 list($listingPath, $hash) = $this->dumpListing($buildDir.'/'.$listing);
                 $hashedListing = basename($listingPath);
                 $this->rootFile['provider-includes']['p/'.str_replace($hash, '%hash%', $hashedListing)] = array('sha256' => $hash);
-                $safeFiles[] = $hashedListing;
             }
 
             if ($verbose) {
@@ -458,15 +453,6 @@ class SymlinkDumper
             $this->copyWriteLog($buildDir, $oldBuildDir);
         }
 
-        // clean up old files once an hour
-        if (!$force && $cleanUpOldFiles) {
-            if ($verbose) {
-                echo 'Cleaning up old files'.PHP_EOL;
-            }
-
-            $this->cleanOldFiles($buildDir, $oldBuildDir, $safeFiles);
-        }
-
         if ($verbose) {
             echo 'Updating package dump times'.PHP_EOL;
         }
@@ -511,7 +497,6 @@ class SymlinkDumper
 
         $this->statsd->increment('packagist.metadata_dump');
 
-        // TODO when a package is deleted, it should be removed from provider files, or marked for removal at least
         return true;
     }
 
@@ -543,6 +528,33 @@ class SymlinkDumper
         }
     }
 
+    public function gc()
+    {
+        // build up array of safe files
+        $safeFiles = [];
+
+        $rootFile = $this->webDir.'/packages.json';
+        if (!file_exists($rootFile) || !is_dir($this->buildDir.'/a')) {
+            return;
+        }
+        $rootJson = json_decode(file_get_contents($rootFile), true);
+        foreach ($rootJson['provider-includes'] as $listing => $opts) {
+            $listing = str_replace('%hash%', $opts['sha256'], $listing);
+            $safeFiles[basename($listing)] = true;
+
+            $listingJson = json_decode(file_get_contents($this->webDir.'/'.$listing), true);
+            foreach ($listingJson['providers'] as $pkg => $opts) {
+                $provPath = $pkg.'$'.$opts['sha256'].'.json';
+                $safeFiles[$provPath] = true;
+            }
+        }
+
+        $buildDirs = [realpath($this->buildDir.'/a'), realpath($this->buildDir.'/b')];
+        shuffle($buildDirs);
+
+        $this->cleanOldFiles($buildDirs[0], $buildDirs[1], $safeFiles);
+    }
+
     private function cleanOldFiles($buildDir, $oldBuildDir, $safeFiles)
     {
         $time = (time() - 86400) * 10000;
@@ -559,7 +571,7 @@ class SymlinkDumper
 
             foreach ($vendorFiles as $file) {
                 $key = strtr(str_replace($buildDir.DIRECTORY_SEPARATOR, '', $file), '\\', '/');
-                if (!in_array($key, $safeFiles, true)) {
+                if (!isset($safeFiles[$key])) {
                     unlink((string) $file);
                     if (file_exists($altDirFile = str_replace($buildDir, $oldBuildDir, (string) $file))) {
                         unlink($altDirFile);
@@ -572,7 +584,7 @@ class SymlinkDumper
         $finder = Finder::create()->depth(0)->files()->name('provider-*.json')->ignoreVCS(true)->in($buildDir)->date('until 10minutes ago');
         foreach ($finder as $provider) {
             $key = strtr(str_replace($buildDir.DIRECTORY_SEPARATOR, '', $provider), '\\', '/');
-            if (!in_array($key, $safeFiles, true)) {
+            if (!isset($safeFiles[$key])) {
                 $path = (string) $provider;
                 unlink($path);
                 if (file_exists($path.'.gz')) {