Browse Source

Automatically delete some github packages that are 404ed conclusively

Jordi Boggiano 7 years ago
parent
commit
0491b03885

+ 1 - 21
src/Packagist/WebBundle/Controller/PackageController.php

@@ -599,27 +599,7 @@ class PackageController extends Controller
         if ($form->isValid()) {
             $req->getSession()->save();
 
-            /** @var VersionRepository $versionRepo */
-            $versionRepo = $doctrine->getRepository('PackagistWebBundle:Version');
-            foreach ($package->getVersions() as $version) {
-                $versionRepo->remove($version);
-            }
-
-            $packageId = $package->getId();
-            $em = $doctrine->getManager();
-            $em->remove($package);
-            $em->flush();
-
-            $this->get('packagist.provider_manager')->deletePackage($package);
-
-            // attempt search index cleanup
-            try {
-                $indexName = $this->container->getParameter('algolia.index_name');
-                $algolia = $this->get('packagist.algolia.client');
-                $index = $algolia->initIndex($indexName);
-                $index->deleteObject($package->getName());
-            } catch (\AlgoliaSearch\AlgoliaException $e) {
-            }
+            $this->get('packagist.package_manager')->deletePackage($package);
 
             return new Response('', 204);
         }

+ 1 - 0
src/Packagist/WebBundle/Entity/Job.php

@@ -26,6 +26,7 @@ class Job
     const STATUS_STARTED = 'started';
     const STATUS_COMPLETED = 'completed';
     const STATUS_PACKAGE_GONE = 'package_gone';
+    const STATUS_PACKAGE_DELETED = 'package_deleted';
     const STATUS_FAILED = 'failed'; // failed in an expected/correct way
     const STATUS_ERRORED = 'errored'; // unexpected failure
     const STATUS_TIMEOUT = 'timeout'; // job was marked timed out

+ 34 - 1
src/Packagist/WebBundle/Model/PackageManager.php

@@ -15,6 +15,8 @@ namespace Packagist\WebBundle\Model;
 use Symfony\Bridge\Doctrine\RegistryInterface;
 use Packagist\WebBundle\Entity\Package;
 use Psr\Log\LoggerInterface;
+use AlgoliaSearch\Client as AlgoliaClient;
+
 
 /**
  * @author Jordi Boggiano <j.boggiano@seld.be>
@@ -26,14 +28,45 @@ class PackageManager
     protected $twig;
     protected $logger;
     protected $options;
+    protected $providerManager;
+    protected $algoliaClient;
+    protected $algoliaIndexName;
 
-    public function __construct(RegistryInterface $doctrine, \Swift_Mailer $mailer, \Twig_Environment $twig, LoggerInterface $logger, array $options)
+    public function __construct(RegistryInterface $doctrine, \Swift_Mailer $mailer, \Twig_Environment $twig, LoggerInterface $logger, array $options, ProviderManager $providerManager, AlgoliaClient $algoliaClient, string $algoliaIndexName)
     {
         $this->doctrine = $doctrine;
         $this->mailer = $mailer;
         $this->twig = $twig;
         $this->logger = $logger;
         $this->options = $options;
+        $this->providerManager = $providerManager;
+        $this->algoliaClient = $algoliaClient;
+        $this->algoliaIndexName  = $algoliaIndexName;
+    }
+
+    public function deletePackage(Package $package)
+    {
+        /** @var VersionRepository $versionRepo */
+        $versionRepo = $this->doctrine->getRepository('PackagistWebBundle:Version');
+        foreach ($package->getVersions() as $version) {
+            $versionRepo->remove($version);
+        }
+
+        $this->providerManager->deletePackage($package);
+        $packageName = $package->getName();
+
+        $em = $this->doctrine->getManager();
+        $em->remove($package);
+        $em->flush();
+
+        // attempt search index cleanup
+        try {
+            $indexName = $this->algoliaIndexName;
+            $algolia = $this->algoliaClient;
+            $index = $algolia->initIndex($indexName);
+            $index->deleteObject($packageName);
+        } catch (\AlgoliaSearch\AlgoliaException $e) {
+        }
     }
 
     public function notifyUpdateFailure(Package $package, \Exception $e, $details = null)

+ 3 - 1
src/Packagist/WebBundle/Package/Updater.php

@@ -93,7 +93,7 @@ class Updater
      * @param int $flags a few of the constants of this class
      * @param \DateTime $start
      */
-    public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null)
+    public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null): Package
     {
         $rfs = new RemoteFilesystem($io, $config);
 
@@ -245,6 +245,8 @@ class Updater
         if ($repository->hadInvalidBranches()) {
             throw new InvalidRepositoryException('Some branches contained invalid data and were discarded, it is advised to review the log and fix any issues present in branches');
         }
+
+        return $package;
     }
 
     /**

+ 4 - 1
src/Packagist/WebBundle/Resources/config/services.yml

@@ -108,6 +108,9 @@ services:
             - '@twig'
             - '@logger'
             - { from: '%mailer_from_email%', fromName: '%mailer_from_name%' }
+            - '@packagist.provider_manager'
+            - '@packagist.algolia.client'
+            - '%algolia.index_name%'
 
     packagist.profile.form.type:
         class: Packagist\WebBundle\Form\Type\ProfileFormType
@@ -154,7 +157,7 @@ services:
 
     updater_worker:
         class: Packagist\WebBundle\Service\UpdaterWorker
-        arguments: ["@logger", "@doctrine", "@packagist.package_updater", "@locker", "@scheduler", "@packagist.package_manager"]
+        arguments: ["@logger", "@doctrine", "@packagist.package_updater", "@locker", "@scheduler", "@packagist.package_manager", "@packagist.download_manager"]
 
     packagist.log_resetter:
         class: Packagist\WebBundle\Service\LogResetter

+ 1 - 1
src/Packagist/WebBundle/Resources/public/js/view.js

@@ -95,7 +95,7 @@
                             url: '/jobs/' + data.job,
                             cache: false,
                             success: function (data) {
-                                if (data.status == 'completed' || data.status == 'errored' || data.status == 'failed') {
+                                if (data.status == 'completed' || data.status == 'errored' || data.status == 'failed' || data.status == 'package_deleted') {
                                     humane.remove();
 
                                     var message = data.message;

+ 1 - 1
src/Packagist/WebBundle/Service/QueueWorker.php

@@ -150,7 +150,7 @@ class QueueWorker
             throw new \LogicException('$result must be an array with at least status and message keys');
         }
 
-        if (!in_array($result['status'], [Job::STATUS_COMPLETED, Job::STATUS_FAILED, Job::STATUS_ERRORED, Job::STATUS_PACKAGE_GONE], true)) {
+        if (!in_array($result['status'], [Job::STATUS_COMPLETED, Job::STATUS_FAILED, Job::STATUS_ERRORED, Job::STATUS_PACKAGE_GONE, Job::STATUS_PACKAGE_DELETED], true)) {
             throw new \LogicException('$result[\'status\'] must be one of '.Job::STATUS_COMPLETED.' or '.Job::STATUS_FAILED.', '.$result['status'].' given');
         }
 

+ 56 - 3
src/Packagist/WebBundle/Service/UpdaterWorker.php

@@ -20,9 +20,11 @@ use Packagist\WebBundle\Entity\Package;
 use Packagist\WebBundle\Package\Updater;
 use Packagist\WebBundle\Entity\Job;
 use Packagist\WebBundle\Model\PackageManager;
+use Packagist\WebBundle\Model\DownloadManager;
 use Seld\Signal\SignalHandler;
 use Composer\Factory;
 use Composer\Downloader\TransportException;
+use Composer\Util\RemoteFilesystem;
 
 class UpdaterWorker
 {
@@ -33,6 +35,7 @@ class UpdaterWorker
     /** @var Scheduler */
     private $scheduler;
     private $packageManager;
+    private $downloadManager;
 
     public function __construct(
         LoggerInterface $logger,
@@ -40,7 +43,8 @@ class UpdaterWorker
         Updater $updater,
         Locker $locker,
         Scheduler $scheduler,
-        PackageManager $packageManager
+        PackageManager $packageManager,
+        DownloadManager $downloadManager
     ) {
         $this->logger = $logger;
         $this->doctrine = $doctrine;
@@ -48,6 +52,7 @@ class UpdaterWorker
         $this->locker = $locker;
         $this->scheduler = $scheduler;
         $this->packageManager = $packageManager;
+        $this->downloadManager = $downloadManager;
     }
 
     public function process(Job $job, SignalHandler $signal): array
@@ -91,13 +96,23 @@ class UpdaterWorker
             $repository->setLoader($loader);
 
             // perform the actual update (fetch and re-scan the repository's source)
-            $this->updater->update($io, $config, $package, $repository, $flags);
+            $package = $this->updater->update($io, $config, $package, $repository, $flags);
+
+            // github update downgraded to a git clone, this should not happen, so check through API whether the package still exists
+            if (preg_match('{[@/]github.com[:/]([^/]+/[^/]+?)(\.git)?$}i', $package->getRepository(), $match) && 0 === strpos($repository->getDriver()->getUrl(), 'git@')) {
+                if ($result = $this->checkForDeadGitHubPackage($package, $match, $io, $io->getOutput())) {
+                    return $result;
+                }
+            }
         } catch (\Throwable $e) {
             $output = $io->getOutput();
 
             if (!$this->doctrine->getEntityManager()->isOpen()) {
                 $this->doctrine->resetManager();
                 $package = $this->doctrine->getEntityManager()->getRepository(Package::class)->findOneById($package->getId());
+            } else {
+                // reload the package just in case as Updater tends to merge it to a new instance
+                $package = $packageRepository->findOneById($id);
             }
 
             // invalid composer data somehow, notify the owner and then mark the job failed
@@ -134,6 +149,13 @@ class UpdaterWorker
                 $found404 = true;
             }
 
+            // github 404'ed, check through API whether the package still exists and delete if not
+            if ($found404 && preg_match('{[@/]github.com[:/]([^/]+/[^/]+?)(\.git)?$}i', $package->getRepository(), $match)) {
+                if ($result = $this->checkForDeadGitHubPackage($package, $match, $io, $output)) {
+                    return $result;
+                }
+            }
+
             // detected a 404 so mark the package as gone and prevent updates for 1y
             if ($found404) {
                 $package->setCrawledAt(new \DateTime('+1 year'));
@@ -170,7 +192,7 @@ class UpdaterWorker
             // unexpected error so mark the job errored
             throw $e;
         } finally {
-            $this->locker->unlockPackageUpdate($package->getId());
+            $this->locker->unlockPackageUpdate($id);
         }
 
         return [
@@ -179,4 +201,35 @@ class UpdaterWorker
             'details' => '<pre>'.$io->getOutput().'</pre>'
         ];
     }
+
+    private function checkForDeadGitHubPackage(Package $package, $match, $io, $output)
+    {
+        $rfs = new RemoteFilesystem($io);
+        try {
+            $rfs->getContents('github.com', 'https://api.github.com/repos/'.$match[1], false, ['retry-auth-failure' => false]);
+        } catch (\Throwable $e) {
+            if ($e instanceof TransportException && $e->getStatusCode() === 404) {
+                try {
+                    if (
+                        // check composer repo is visible to make sure it's not github or something else glitching
+                        $rfs->getContents('github.com', 'https://api.github.com/repos/composer/composer', false, ['retry-auth-failure' => false])
+                        // remove packages with very low downloads and that are 404
+                        && $this->downloadManager->getTotalDownloads($package) <= 100
+                    ) {
+                        $name = $package->getName();
+                        $this->packageManager->deletePackage($package);
+
+                        return [
+                            'status' => Job::STATUS_PACKAGE_DELETED,
+                            'message' => 'Update of '.$package->getName().' failed, package appears to be 404/gone and has been deleted',
+                            'details' => '<pre>'.$output.'</pre>',
+                            'exception' => $e,
+                        ];
+                    }
+                } catch (\Throwable $e) {
+                    // ignore failures here, we/github must be offline
+                }
+            }
+        }
+    }
 }