UpdaterWorker.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. <?php declare(strict_types=1);
  2. namespace Packagist\WebBundle\Service;
  3. use Packagist\WebBundle\Service\Scheduler;
  4. use Psr\Log\LoggerInterface;
  5. use Composer\Package\Loader\ArrayLoader;
  6. use Composer\Package\Loader\ValidatingArrayLoader;
  7. use Symfony\Bridge\Doctrine\RegistryInterface;
  8. use Composer\Console\HtmlOutputFormatter;
  9. use Composer\Repository\InvalidRepositoryException;
  10. use Composer\Repository\VcsRepository;
  11. use Composer\IO\ConsoleIO;
  12. use Composer\IO\BufferIO;
  13. use Symfony\Component\Console\Input\StringInput;
  14. use Symfony\Component\Console\Output\OutputInterface;
  15. use Symfony\Component\Console\Helper\HelperSet;
  16. use Monolog\Handler\StreamHandler;
  17. use Packagist\WebBundle\Entity\Package;
  18. use Packagist\WebBundle\Package\Updater;
  19. use Packagist\WebBundle\Entity\Job;
  20. use Packagist\WebBundle\Model\PackageManager;
  21. use Packagist\WebBundle\Model\DownloadManager;
  22. use Seld\Signal\SignalHandler;
  23. use Composer\Factory;
  24. use Composer\Downloader\TransportException;
  25. use Composer\Util\RemoteFilesystem;
  26. class UpdaterWorker
  27. {
  28. private $logger;
  29. private $doctrine;
  30. private $updater;
  31. private $locker;
  32. /** @var Scheduler */
  33. private $scheduler;
  34. private $packageManager;
  35. private $downloadManager;
  36. public function __construct(
  37. LoggerInterface $logger,
  38. RegistryInterface $doctrine,
  39. Updater $updater,
  40. Locker $locker,
  41. Scheduler $scheduler,
  42. PackageManager $packageManager,
  43. DownloadManager $downloadManager
  44. ) {
  45. $this->logger = $logger;
  46. $this->doctrine = $doctrine;
  47. $this->updater = $updater;
  48. $this->locker = $locker;
  49. $this->scheduler = $scheduler;
  50. $this->packageManager = $packageManager;
  51. $this->downloadManager = $downloadManager;
  52. }
  53. public function process(Job $job, SignalHandler $signal): array
  54. {
  55. $em = $this->doctrine->getEntityManager();
  56. $id = $job->getPayload()['id'];
  57. $packageRepository = $em->getRepository(Package::class);
  58. /** @var Package $package */
  59. $package = $packageRepository->findOneById($id);
  60. if (!$package) {
  61. $this->logger->info('Package is gone, skipping', ['id' => $id]);
  62. return ['status' => Job::STATUS_PACKAGE_GONE, 'message' => 'Package was deleted, skipped'];
  63. }
  64. $lockAcquired = $this->locker->lockPackageUpdate($id);
  65. if (!$lockAcquired) {
  66. return ['status' => Job::STATUS_RESCHEDULE, 'after' => new \DateTime('+5 seconds')];
  67. }
  68. $this->logger->info('Updating '.$package->getName());
  69. $config = Factory::createConfig();
  70. $io = new BufferIO('', OutputInterface::VERBOSITY_VERY_VERBOSE, new HtmlOutputFormatter(Factory::createAdditionalStyles()));
  71. $io->loadConfiguration($config);
  72. try {
  73. $flags = 0;
  74. if ($job->getPayload()['update_equal_refs'] === true) {
  75. $flags = Updater::UPDATE_EQUAL_REFS;
  76. }
  77. if ($job->getPayload()['delete_before'] === true) {
  78. $flags = Updater::DELETE_BEFORE;
  79. }
  80. // prepare dependencies
  81. $loader = new ValidatingArrayLoader(new ArrayLoader());
  82. // prepare repository
  83. $repository = new VcsRepository(array('url' => $package->getRepository()), $io, $config);
  84. $repository->setLoader($loader);
  85. // perform the actual update (fetch and re-scan the repository's source)
  86. $package = $this->updater->update($io, $config, $package, $repository, $flags);
  87. // github update downgraded to a git clone, this should not happen, so check through API whether the package still exists
  88. if (preg_match('{[@/]github.com[:/]([^/]+/[^/]+?)(\.git)?$}i', $package->getRepository(), $match) && 0 === strpos($repository->getDriver()->getUrl(), 'git@')) {
  89. if ($result = $this->checkForDeadGitHubPackage($package, $match, $io, $io->getOutput())) {
  90. return $result;
  91. }
  92. }
  93. } catch (\Throwable $e) {
  94. $output = $io->getOutput();
  95. if (!$this->doctrine->getEntityManager()->isOpen()) {
  96. $this->doctrine->resetManager();
  97. $package = $this->doctrine->getEntityManager()->getRepository(Package::class)->findOneById($package->getId());
  98. } else {
  99. // reload the package just in case as Updater tends to merge it to a new instance
  100. $package = $packageRepository->findOneById($id);
  101. }
  102. // invalid composer data somehow, notify the owner and then mark the job failed
  103. if ($e instanceof InvalidRepositoryException) {
  104. $this->packageManager->notifyUpdateFailure($package, $e, $output);
  105. return [
  106. 'status' => Job::STATUS_FAILED,
  107. 'message' => 'Update of '.$package->getName().' failed, invalid composer.json metadata',
  108. 'details' => '<pre>'.$output.'</pre>',
  109. 'exception' => $e,
  110. ];
  111. }
  112. $found404 = false;
  113. // attempt to detect a 404/dead repository
  114. // TODO check and delete those packages with crawledAt in the far future but updatedAt in the past in a second step/job if the repo is really unreachable
  115. // probably should check for download count and a few other metrics to avoid false positives and ask humans to check the others
  116. if ($e instanceof \RuntimeException && strpos($e->getMessage(), 'remote: Repository not found')) {
  117. // git clone was attempted and says the repo is not found, that's very conclusive
  118. $found404 = true;
  119. } elseif ($e instanceof \RuntimeException && strpos($e->getMessage(), 'git@gitlab.com') && strpos($e->getMessage(), 'Please make sure you have the correct access rights')) {
  120. // git clone says we have no right on gitlab for 404s
  121. $found404 = true;
  122. } elseif ($e instanceof \RuntimeException && strpos($e->getMessage(), 'git@bitbucket.org') && strpos($e->getMessage(), 'Please make sure you have the correct access rights')) {
  123. // git clone says we have no right on bitbucket for 404s
  124. $found404 = true;
  125. } elseif ($e instanceof \RuntimeException && strpos($e->getMessage(), '@github.com/') && strpos($e->getMessage(), ' Please ask the owner to check their account')) {
  126. // git clone says account is disabled on github for private repos(?) if cloning via https
  127. $found404 = true;
  128. } elseif ($e instanceof TransportException && preg_match('{https://api.bitbucket.org/2.0/repositories/[^/]+/.+?\?fields=-project}i', $e->getMessage()) && $e->getStatusCode() == 404) {
  129. // bitbucket api root returns a 404
  130. $found404 = true;
  131. }
  132. // github 404'ed, check through API whether the package still exists and delete if not
  133. if ($found404 && preg_match('{[@/]github.com[:/]([^/]+/[^/]+?)(\.git)?$}i', $package->getRepository(), $match)) {
  134. if ($result = $this->checkForDeadGitHubPackage($package, $match, $io, $output)) {
  135. return $result;
  136. }
  137. }
  138. // detected a 404 so mark the package as gone and prevent updates for 1y
  139. if ($found404) {
  140. $package->setCrawledAt(new \DateTime('+1 year'));
  141. $this->doctrine->getEntityManager()->flush($package);
  142. return [
  143. 'status' => Job::STATUS_PACKAGE_GONE,
  144. 'message' => 'Update of '.$package->getName().' failed, package appears to be 404/gone and has been marked as crawled for 1year',
  145. 'details' => '<pre>'.$output.'</pre>',
  146. 'exception' => $e,
  147. ];
  148. }
  149. // Catch request timeouts e.g. gitlab.com
  150. if ($e instanceof TransportException && strpos($e->getMessage(), 'file could not be downloaded: failed to open stream: HTTP request failed!')) {
  151. return [
  152. 'status' => Job::STATUS_FAILED,
  153. 'message' => 'Package data of '.$package->getName().' could not be downloaded. Could not reach remote VCS server. Please try again later.',
  154. 'exception' => $e
  155. ];
  156. }
  157. // generic transport exception
  158. if ($e instanceof TransportException) {
  159. return [
  160. 'status' => Job::STATUS_FAILED,
  161. 'message' => 'Package data of '.$package->getName().' could not be downloaded.',
  162. 'exception' => $e
  163. ];
  164. }
  165. $this->logger->error('Failed update of '.$package->getName(), ['exception' => $e]);
  166. // unexpected error so mark the job errored
  167. throw $e;
  168. } finally {
  169. $this->locker->unlockPackageUpdate($id);
  170. }
  171. return [
  172. 'status' => Job::STATUS_COMPLETED,
  173. 'message' => 'Update of '.$package->getName().' complete',
  174. 'details' => '<pre>'.$io->getOutput().'</pre>'
  175. ];
  176. }
  177. private function checkForDeadGitHubPackage(Package $package, $match, $io, $output)
  178. {
  179. $rfs = new RemoteFilesystem($io);
  180. try {
  181. $rfs->getContents('github.com', 'https://api.github.com/repos/'.$match[1], false, ['retry-auth-failure' => false]);
  182. } catch (\Throwable $e) {
  183. if ($e instanceof TransportException && $e->getStatusCode() === 404) {
  184. try {
  185. if (
  186. // check composer repo is visible to make sure it's not github or something else glitching
  187. $rfs->getContents('github.com', 'https://api.github.com/repos/composer/composer', false, ['retry-auth-failure' => false])
  188. // remove packages with very low downloads and that are 404
  189. && $this->downloadManager->getTotalDownloads($package) <= 100
  190. ) {
  191. $name = $package->getName();
  192. $this->packageManager->deletePackage($package);
  193. return [
  194. 'status' => Job::STATUS_PACKAGE_DELETED,
  195. 'message' => 'Update of '.$package->getName().' failed, package appears to be 404/gone and has been deleted',
  196. 'details' => '<pre>'.$output.'</pre>',
  197. 'exception' => $e,
  198. ];
  199. }
  200. } catch (\Throwable $e) {
  201. // ignore failures here, we/github must be offline
  202. }
  203. }
  204. }
  205. }
  206. }