IndexPackagesCommand.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. <?php
  2. /*
  3. * This file is part of Packagist.
  4. *
  5. * (c) Jordi Boggiano <j.boggiano@seld.be>
  6. * Nils Adermann <naderman@naderman.de>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Packagist\WebBundle\Command;
  12. use Packagist\WebBundle\Entity\Package;
  13. use Packagist\WebBundle\Model\DownloadManager;
  14. use Packagist\WebBundle\Model\FavoriteManager;
  15. use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
  16. use Symfony\Component\Console\Input\InputArgument;
  17. use Symfony\Component\Console\Input\InputInterface;
  18. use Symfony\Component\Console\Input\InputOption;
  19. use Symfony\Component\Console\Output\OutputInterface;
  20. use Doctrine\DBAL\Connection;
  21. class IndexPackagesCommand extends ContainerAwareCommand
  22. {
  23. /**
  24. * {@inheritdoc}
  25. */
  26. protected function configure()
  27. {
  28. $this
  29. ->setName('packagist:index')
  30. ->setDefinition(array(
  31. new InputOption('force', null, InputOption::VALUE_NONE, 'Force a re-indexing of all packages'),
  32. new InputOption('all', null, InputOption::VALUE_NONE, 'Index all packages without clearing the index first'),
  33. new InputArgument('package', InputArgument::OPTIONAL, 'Package name to index'),
  34. ))
  35. ->setDescription('Indexes packages in Algolia')
  36. ;
  37. }
  38. /**
  39. * {@inheritdoc}
  40. */
  41. protected function execute(InputInterface $input, OutputInterface $output)
  42. {
  43. $verbose = $input->getOption('verbose');
  44. $force = $input->getOption('force');
  45. $indexAll = $input->getOption('all');
  46. $package = $input->getArgument('package');
  47. $indexName = $this->getContainer()->getParameter('algolia.index_name');
  48. $deployLock = $this->getContainer()->getParameter('kernel.cache_dir').'/deploy.globallock';
  49. if (file_exists($deployLock)) {
  50. if ($verbose) {
  51. $output->writeln('Aborting, '.$deployLock.' file present');
  52. }
  53. return;
  54. }
  55. $locker = $this->getContainer()->get('locker');
  56. $lockAcquired = $locker->lockCommand($this->getName());
  57. if (!$lockAcquired) {
  58. if ($input->getOption('verbose')) {
  59. $output->writeln('Aborting, another task is running already');
  60. }
  61. return;
  62. }
  63. $doctrine = $this->getContainer()->get('doctrine');
  64. $algolia = $this->getContainer()->get('packagist.algolia.client');
  65. $index = $algolia->initIndex($indexName);
  66. $redis = $this->getContainer()->get('snc_redis.default');
  67. $downloadManager = $this->getContainer()->get('packagist.download_manager');
  68. $favoriteManager = $this->getContainer()->get('packagist.favorite_manager');
  69. if ($package) {
  70. $packages = array(array('id' => $doctrine->getRepository('PackagistWebBundle:Package')->findOneByName($package)->getId()));
  71. } elseif ($force || $indexAll) {
  72. $packages = $doctrine->getManager()->getConnection()->fetchAll('SELECT id FROM package ORDER BY id ASC');
  73. $doctrine->getManager()->getConnection()->executeQuery('UPDATE package SET indexedAt = NULL');
  74. } else {
  75. $packages = $doctrine->getRepository('PackagistWebBundle:Package')->getStalePackagesForIndexing();
  76. }
  77. $ids = array();
  78. foreach ($packages as $row) {
  79. $ids[] = $row['id'];
  80. }
  81. // clear index before a full-update
  82. if ($force && !$package) {
  83. if ($verbose) {
  84. $output->writeln('Deleting existing index');
  85. }
  86. $index->clear();
  87. }
  88. $total = count($ids);
  89. $current = 0;
  90. // update package index
  91. while ($ids) {
  92. $indexTime = new \DateTime;
  93. $idsSlice = array_splice($ids, 0, 50);
  94. $packages = $doctrine->getRepository('PackagistWebBundle:Package')->findById($idsSlice);
  95. $idsToUpdate = [];
  96. $records = [];
  97. foreach ($packages as $package) {
  98. $current++;
  99. if ($verbose) {
  100. $output->writeln('['.sprintf('%'.strlen($total).'d', $current).'/'.$total.'] Indexing '.$package->getName());
  101. }
  102. // delete spam packages from the search index
  103. if ($package->isAbandoned() && $package->getReplacementPackage() === 'spam/spam') {
  104. try {
  105. $index->deleteObject($package->getName());
  106. $idsToUpdate[] = $package->getId();
  107. continue;
  108. } catch (\AlgoliaSearch\AlgoliaException $e) {
  109. }
  110. }
  111. try {
  112. $tags = $this->getTags($doctrine, $package);
  113. $records[] = $this->packageToSearchableArray($package, $tags, $redis, $downloadManager, $favoriteManager);
  114. $idsToUpdate[] = $package->getId();
  115. } catch (\Exception $e) {
  116. $output->writeln('<error>Exception: '.$e->getMessage().', skipping package '.$package->getName().'.</error>');
  117. continue;
  118. }
  119. $providers = $this->getProviders($doctrine, $package);
  120. foreach ($providers as $provided) {
  121. $records[] = $this->createSearchableProvider($provided['packageName']);
  122. }
  123. }
  124. try {
  125. $index->saveObjects($records);
  126. } catch (\Exception $e) {
  127. $output->writeln('<error>'.get_class($e).': '.$e->getMessage().', occurred while processing packages: '.implode(',', $idsSlice).'</error>');
  128. continue;
  129. }
  130. $doctrine->getManager()->clear();
  131. unset($packages);
  132. if ($verbose) {
  133. $output->writeln('Updating package indexedAt column');
  134. }
  135. $this->updateIndexedAt($idsToUpdate, $doctrine, $indexTime->format('Y-m-d H:i:s'));
  136. }
  137. $locker->unlockCommand($this->getName());
  138. }
  139. private function packageToSearchableArray(
  140. Package $package,
  141. array $tags,
  142. $redis,
  143. DownloadManager $downloadManager,
  144. FavoriteManager $favoriteManager
  145. ) {
  146. $faversCount = $favoriteManager->getFaverCount($package);
  147. $downloads = $downloadManager->getDownloads($package);
  148. $downloadsLog = $downloads['monthly'] > 0 ? log($downloads['monthly'], 10) : 0;
  149. $starsLog = $package->getGitHubStars() > 0 ? log($package->getGitHubStars(), 10) : 0;
  150. $popularity = round($downloadsLog + $starsLog);
  151. $trendiness = $redis->zscore('downloads:trending', $package->getId());
  152. $record = [
  153. 'id' => $package->getId(),
  154. 'objectID' => $package->getName(),
  155. 'name' => $package->getName(),
  156. 'package_organisation' => $package->getVendor(),
  157. 'package_name' => $package->getPackageName(),
  158. 'description' => preg_replace('{[\x00-\x1f]+}u', '', strip_tags($package->getDescription())),
  159. 'type' => $package->getType(),
  160. 'repository' => $package->getRepository(),
  161. 'language' => $package->getLanguage(),
  162. # log10 of downloads over the last 7days
  163. 'trendiness' => $trendiness > 0 ? log($trendiness, 10) : 0,
  164. # log10 of downloads + gh stars
  165. 'popularity' => $popularity,
  166. 'meta' => [
  167. 'downloads' => $downloads['total'],
  168. 'downloads_formatted' => number_format($downloads['total'], 0, ',', ' '),
  169. 'favers' => $faversCount,
  170. 'favers_formatted' => number_format($faversCount, 0, ',', ' '),
  171. ],
  172. ];
  173. if ($package->isAbandoned()) {
  174. $record['abandoned'] = 1;
  175. $record['replacementPackage'] = $package->getReplacementPackage() ?: '';
  176. } else {
  177. $record['abandoned'] = 0;
  178. $record['replacementPackage'] = '';
  179. }
  180. $record['tags'] = $tags;
  181. return $record;
  182. }
  183. private function createSearchableProvider(string $provided)
  184. {
  185. return [
  186. 'id' => $provided,
  187. 'objectID' => 'virtual:'.$provided,
  188. 'name' => $provided,
  189. 'package_organisation' => preg_replace('{/.*$}', '', $provided),
  190. 'package_name' => preg_replace('{^[^/]*/}', '', $provided),
  191. 'description' => '',
  192. 'type' => 'virtual-package',
  193. 'repository' => '',
  194. 'language' => '',
  195. 'trendiness' => 100,
  196. 'popularity' => 4,
  197. 'abandoned' => 0,
  198. 'replacementPackage' => '',
  199. 'tags' => [],
  200. ];
  201. }
  202. private function getProviders($doctrine, Package $package)
  203. {
  204. return $doctrine->getManager()->getConnection()->fetchAll(
  205. 'SELECT lp.packageName
  206. FROM package p
  207. JOIN package_version pv ON p.id = pv.package_id
  208. JOIN link_provide lp ON lp.version_id = pv.id
  209. WHERE p.id = :id
  210. AND pv.development = true
  211. GROUP BY lp.packageName',
  212. ['id' => $package->getId()]
  213. );
  214. }
  215. private function getTags($doctrine, Package $package)
  216. {
  217. $tags = $doctrine->getManager()->getConnection()->fetchAll(
  218. 'SELECT t.name FROM package p
  219. JOIN package_version pv ON p.id = pv.package_id
  220. JOIN version_tag vt ON vt.version_id = pv.id
  221. JOIN tag t ON t.id = vt.tag_id
  222. WHERE p.id = :id
  223. GROUP BY t.id, t.name',
  224. ['id' => $package->getId()]
  225. );
  226. foreach ($tags as $idx => $tag) {
  227. $tags[$idx] = $tag['name'];
  228. }
  229. return array_map(function ($tag) {
  230. return mb_strtolower(preg_replace('{[\x00-\x1f]+}u', '', $tag), 'UTF-8');
  231. }, $tags);
  232. }
  233. private function updateIndexedAt(array $idsToUpdate, $doctrine, string $time)
  234. {
  235. $retries = 5;
  236. // retry loop in case of a lock timeout
  237. while ($retries--) {
  238. try {
  239. $doctrine->getManager()->getConnection()->executeQuery(
  240. 'UPDATE package SET indexedAt=:indexed WHERE id IN (:ids)',
  241. [
  242. 'ids' => $idsToUpdate,
  243. 'indexed' => $time,
  244. ],
  245. ['ids' => Connection::PARAM_INT_ARRAY]
  246. );
  247. } catch (\Exception $e) {
  248. if (!$retries) {
  249. throw $e;
  250. }
  251. sleep(2);
  252. }
  253. }
  254. }
  255. }