Updater.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. <?php
  2. /*
  3. * This file is part of Packagist.
  4. *
  5. * (c) Jordi Boggiano <j.boggiano@seld.be>
  6. * Nils Adermann <naderman@naderman.de>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Packagist\WebBundle\Package;
  12. use Composer\Package\AliasPackage;
  13. use Composer\Package\PackageInterface;
  14. use Composer\Repository\RepositoryInterface;
  15. use Composer\Repository\InvalidRepositoryException;
  16. use Composer\Util\ErrorHandler;
  17. use Composer\Util\RemoteFilesystem;
  18. use Composer\Json\JsonFile;
  19. use Composer\Config;
  20. use Composer\IO\IOInterface;
  21. use Packagist\WebBundle\Entity\Author;
  22. use Packagist\WebBundle\Entity\Package;
  23. use Packagist\WebBundle\Entity\Tag;
  24. use Packagist\WebBundle\Entity\Version;
  25. use Packagist\WebBundle\Entity\SuggestLink;
  26. use Symfony\Bridge\Doctrine\RegistryInterface;
  27. /**
  28. * @author Jordi Boggiano <j.boggiano@seld.be>
  29. */
  30. class Updater
  31. {
  32. const UPDATE_EQUAL_REFS = 1;
  33. const DELETE_BEFORE = 2;
  34. /**
  35. * Doctrine
  36. * @var RegistryInterface
  37. */
  38. protected $doctrine;
  39. /**
  40. * Supported link types
  41. * @var array
  42. */
  43. protected $supportedLinkTypes = array(
  44. 'require' => array(
  45. 'method' => 'getRequires',
  46. 'entity' => 'RequireLink',
  47. ),
  48. 'conflict' => array(
  49. 'method' => 'getConflicts',
  50. 'entity' => 'ConflictLink',
  51. ),
  52. 'provide' => array(
  53. 'method' => 'getProvides',
  54. 'entity' => 'ProvideLink',
  55. ),
  56. 'replace' => array(
  57. 'method' => 'getReplaces',
  58. 'entity' => 'ReplaceLink',
  59. ),
  60. 'devRequire' => array(
  61. 'method' => 'getDevRequires',
  62. 'entity' => 'DevRequireLink',
  63. ),
  64. );
  65. /**
  66. * Constructor
  67. *
  68. * @param RegistryInterface $doctrine
  69. */
  70. public function __construct(RegistryInterface $doctrine)
  71. {
  72. $this->doctrine = $doctrine;
  73. ErrorHandler::register();
  74. }
  75. /**
  76. * Update a project
  77. *
  78. * @param \Packagist\WebBundle\Entity\Package $package
  79. * @param RepositoryInterface $repository the repository instance used to update from
  80. * @param int $flags a few of the constants of this class
  81. * @param \DateTime $start
  82. */
  83. public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null)
  84. {
  85. $rfs = new RemoteFilesystem($io, $config);
  86. $blacklist = '{^symfony/symfony (2.0.[456]|dev-charset|dev-console)}i';
  87. if (null === $start) {
  88. $start = new \DateTime();
  89. }
  90. $pruneDate = clone $start;
  91. $pruneDate->modify('-1min');
  92. $versions = $repository->getPackages();
  93. $em = $this->doctrine->getManager();
  94. usort($versions, function ($a, $b) {
  95. $aVersion = $a->getVersion();
  96. $bVersion = $b->getVersion();
  97. if ($aVersion === '9999999-dev' || 'dev-' === substr($aVersion, 0, 4)) {
  98. $aVersion = 'dev';
  99. }
  100. if ($bVersion === '9999999-dev' || 'dev-' === substr($bVersion, 0, 4)) {
  101. $bVersion = 'dev';
  102. }
  103. $aIsDev = $aVersion === 'dev' || substr($aVersion, -4) === '-dev';
  104. $bIsDev = $bVersion === 'dev' || substr($bVersion, -4) === '-dev';
  105. // push dev versions to the end
  106. if ($aIsDev !== $bIsDev) {
  107. return $aIsDev ? 1 : -1;
  108. }
  109. // equal versions are sorted by date
  110. if ($aVersion === $bVersion) {
  111. return $a->getReleaseDate() > $b->getReleaseDate() ? 1 : -1;
  112. }
  113. // the rest is sorted by version
  114. return version_compare($aVersion, $bVersion);
  115. });
  116. $versionRepository = $this->doctrine->getRepository('PackagistWebBundle:Version');
  117. if ($flags & self::DELETE_BEFORE) {
  118. foreach ($package->getVersions() as $version) {
  119. $versionRepository->remove($version);
  120. }
  121. $em->flush();
  122. $em->refresh($package);
  123. }
  124. $lastUpdated = true;
  125. foreach ($versions as $version) {
  126. if ($version instanceof AliasPackage) {
  127. continue;
  128. }
  129. if (preg_match($blacklist, $version->getName().' '.$version->getPrettyVersion())) {
  130. continue;
  131. }
  132. $lastUpdated = $this->updateInformation($package, $version, $flags);
  133. if ($lastUpdated) {
  134. $em->flush();
  135. }
  136. }
  137. if (!$lastUpdated) {
  138. $em->flush();
  139. }
  140. // remove outdated versions
  141. foreach ($package->getVersions() as $version) {
  142. if ($version->getUpdatedAt() < $pruneDate) {
  143. $versionRepository->remove($version);
  144. }
  145. }
  146. if (preg_match('{^(?:git://|git@|https?://)github.com[:/]([^/]+)/(.+?)(?:\.git|/)?$}i', $package->getRepository(), $match)) {
  147. $this->updateGitHubInfo($rfs, $package, $match[1], $match[2]);
  148. }
  149. $package->setUpdatedAt(new \DateTime);
  150. $package->setCrawledAt(new \DateTime);
  151. $em->flush();
  152. if ($repository->hadInvalidBranches()) {
  153. throw new InvalidRepositoryException('Some branches contained invalid data and were discarded, it is advised to review the log and fix any issues present in branches');
  154. }
  155. }
  156. private function updateInformation(Package $package, PackageInterface $data, $flags)
  157. {
  158. $em = $this->doctrine->getManager();
  159. $version = new Version();
  160. $normVersion = $data->getVersion();
  161. $existingVersion = $package->getVersion($normVersion);
  162. if ($existingVersion) {
  163. $source = $existingVersion->getSource();
  164. // update if the right flag is set, or the source reference has changed (re-tag or new commit on branch)
  165. if ($source['reference'] !== $data->getSourceReference() || ($flags & self::UPDATE_EQUAL_REFS)) {
  166. $version = $existingVersion;
  167. } else {
  168. // mark it updated to avoid it being pruned
  169. $existingVersion->setUpdatedAt(new \DateTime);
  170. return false;
  171. }
  172. }
  173. $version->setName($package->getName());
  174. $version->setVersion($data->getPrettyVersion());
  175. $version->setNormalizedVersion($normVersion);
  176. $version->setDevelopment($data->isDev());
  177. $em->persist($version);
  178. $version->setDescription($data->getDescription());
  179. $package->setDescription($data->getDescription());
  180. $version->setHomepage($data->getHomepage());
  181. $version->setLicense($data->getLicense() ?: array());
  182. $version->setPackage($package);
  183. $version->setUpdatedAt(new \DateTime);
  184. $version->setReleasedAt($data->getReleaseDate());
  185. if ($data->getSourceType()) {
  186. $source['type'] = $data->getSourceType();
  187. $source['url'] = $data->getSourceUrl();
  188. $source['reference'] = $data->getSourceReference();
  189. $version->setSource($source);
  190. } else {
  191. $version->setSource(null);
  192. }
  193. if ($data->getDistType()) {
  194. $dist['type'] = $data->getDistType();
  195. $dist['url'] = $data->getDistUrl();
  196. $dist['reference'] = $data->getDistReference();
  197. $dist['shasum'] = $data->getDistSha1Checksum();
  198. $version->setDist($dist);
  199. } else {
  200. $version->setDist(null);
  201. }
  202. if ($data->getType()) {
  203. $version->setType($data->getType());
  204. if ($data->getType() && $data->getType() !== $package->getType()) {
  205. $package->setType($data->getType());
  206. }
  207. }
  208. $version->setTargetDir($data->getTargetDir());
  209. $version->setAutoload($data->getAutoload());
  210. $version->setExtra($data->getExtra());
  211. $version->setBinaries($data->getBinaries());
  212. $version->setIncludePaths($data->getIncludePaths());
  213. $version->setSupport($data->getSupport());
  214. $version->getTags()->clear();
  215. if ($data->getKeywords()) {
  216. $keywords = array();
  217. foreach ($data->getKeywords() as $keyword) {
  218. $keywords[mb_strtolower($keyword, 'UTF-8')] = $keyword;
  219. }
  220. foreach ($keywords as $keyword) {
  221. $tag = Tag::getByName($em, $keyword, true);
  222. if (!$version->getTags()->contains($tag)) {
  223. $version->addTag($tag);
  224. }
  225. }
  226. }
  227. $authorRepository = $this->doctrine->getRepository('PackagistWebBundle:Author');
  228. $version->getAuthors()->clear();
  229. if ($data->getAuthors()) {
  230. foreach ($data->getAuthors() as $authorData) {
  231. $author = null;
  232. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  233. if (isset($authorData[$field])) {
  234. $authorData[$field] = trim($authorData[$field]);
  235. if ('' === $authorData[$field]) {
  236. $authorData[$field] = null;
  237. }
  238. } else {
  239. $authorData[$field] = null;
  240. }
  241. }
  242. // skip authors with no information
  243. if (!isset($authorData['email']) && !isset($authorData['name'])) {
  244. continue;
  245. }
  246. $author = $authorRepository->findOneBy(array(
  247. 'email' => $authorData['email'],
  248. 'name' => $authorData['name'],
  249. 'homepage' => $authorData['homepage'],
  250. 'role' => $authorData['role'],
  251. ));
  252. if (!$author) {
  253. $author = new Author();
  254. $em->persist($author);
  255. }
  256. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  257. if (isset($authorData[$field])) {
  258. $author->{'set'.$field}($authorData[$field]);
  259. }
  260. }
  261. $author->setUpdatedAt(new \DateTime);
  262. if (!$version->getAuthors()->contains($author)) {
  263. $version->addAuthor($author);
  264. }
  265. if (!$author->getVersions()->contains($version)) {
  266. $author->addVersion($version);
  267. }
  268. }
  269. }
  270. // handle links
  271. foreach ($this->supportedLinkTypes as $linkType => $opts) {
  272. $links = array();
  273. foreach ($data->{$opts['method']}() as $link) {
  274. $constraint = $link->getPrettyConstraint();
  275. if (false !== strpos($constraint, ',') && false !== strpos($constraint, '@')) {
  276. $constraint = preg_replace_callback('{([><]=?\s*[^@]+?)@([a-z]+)}i', function ($matches) {
  277. if ($matches[2] === 'stable') {
  278. return $matches[1];
  279. }
  280. return $matches[1].'-'.$matches[2];
  281. }, $constraint);
  282. }
  283. $links[$link->getTarget()] = $constraint;
  284. }
  285. foreach ($version->{'get'.$linkType}() as $link) {
  286. // clear links that have changed/disappeared (for updates)
  287. if (!isset($links[$link->getPackageName()]) || $links[$link->getPackageName()] !== $link->getPackageVersion()) {
  288. $version->{'get'.$linkType}()->removeElement($link);
  289. $em->remove($link);
  290. } else {
  291. // clear those that are already set
  292. unset($links[$link->getPackageName()]);
  293. }
  294. }
  295. foreach ($links as $linkPackageName => $linkPackageVersion) {
  296. $class = 'Packagist\WebBundle\Entity\\'.$opts['entity'];
  297. $link = new $class;
  298. $link->setPackageName($linkPackageName);
  299. $link->setPackageVersion($linkPackageVersion);
  300. $version->{'add'.$linkType.'Link'}($link);
  301. $link->setVersion($version);
  302. $em->persist($link);
  303. }
  304. }
  305. // handle suggests
  306. if ($suggests = $data->getSuggests()) {
  307. foreach ($version->getSuggest() as $link) {
  308. // clear links that have changed/disappeared (for updates)
  309. if (!isset($suggests[$link->getPackageName()]) || $suggests[$link->getPackageName()] !== $link->getPackageVersion()) {
  310. $version->getSuggest()->removeElement($link);
  311. $em->remove($link);
  312. } else {
  313. // clear those that are already set
  314. unset($suggests[$link->getPackageName()]);
  315. }
  316. }
  317. foreach ($suggests as $linkPackageName => $linkPackageVersion) {
  318. $link = new SuggestLink;
  319. $link->setPackageName($linkPackageName);
  320. $link->setPackageVersion($linkPackageVersion);
  321. $version->addSuggestLink($link);
  322. $link->setVersion($version);
  323. $em->persist($link);
  324. }
  325. } elseif (count($version->getSuggest())) {
  326. // clear existing suggests if present
  327. foreach ($version->getSuggest() as $link) {
  328. $em->remove($link);
  329. }
  330. $version->getSuggest()->clear();
  331. }
  332. if (!$package->getVersions()->contains($version)) {
  333. $package->addVersions($version);
  334. }
  335. return true;
  336. }
  337. private function updateGitHubInfo(RemoteFilesystem $rfs, Package $package, $owner, $repo)
  338. {
  339. $baseApiUrl = 'https://api.github.com/repos/'.$owner.'/'.$repo;
  340. try {
  341. $repoData = JsonFile::parseJson($rfs->getContents('github.com', $baseApiUrl, false), $baseApiUrl);
  342. } catch (\Exception $e) {
  343. return;
  344. }
  345. try {
  346. $opts = ['http' => ['header' => ['Accept: application/vnd.github.v3.html']]];
  347. $readme = $rfs->getContents('github.com', $baseApiUrl.'/readme', false, $opts);
  348. } catch (\Exception $e) {
  349. if (!$e instanceof \Composer\Downloader\TransportException || $e->getCode() !== 404) {
  350. return;
  351. }
  352. // 404s just mean no readme present so we proceed with the rest
  353. }
  354. if (!empty($readme)) {
  355. $config = \HTMLPurifier_Config::createDefault();
  356. $config->set('HTML.Allowed', 'a[href|target|rel|id],strong,b,em,i,strike,pre,code,p,ol,ul,li,br,h1,h2,h3,img[src|title|alt|width|height|style]');
  357. $config->set('Attr.EnableID', true);
  358. $config->set('Attr.AllowedFrameTargets', ['_blank']);
  359. $purifier = new \HTMLPurifier($config);
  360. $readme = $purifier->purify($readme);
  361. $dom = new \DOMDocument();
  362. $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
  363. // Links can not be trusted, mark them nofollow and convert relative to absolute links
  364. $links = $dom->getElementsByTagName('a');
  365. foreach ($links as $link) {
  366. $link->setAttribute('rel', 'nofollow');
  367. if ('#' === substr($link->getAttribute('href'), 0, 1)) {
  368. $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
  369. } elseif (false === strpos($link->getAttribute('href'), '//')) {
  370. $link->setAttribute('href', 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$link->getAttribute('href'));
  371. }
  372. }
  373. // convert relative to absolute images
  374. $images = $dom->getElementsByTagName('img');
  375. foreach ($images as $img) {
  376. if (false === strpos($img->getAttribute('src'), '//')) {
  377. $img->setAttribute('src', 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$img->getAttribute('src'));
  378. }
  379. }
  380. // remove first title as it's usually the project name which we don't need
  381. if ($dom->getElementsByTagName('h1')->length) {
  382. $first = $dom->getElementsByTagName('h1')->item(0);
  383. $first->parentNode->removeChild($first);
  384. } elseif ($dom->getElementsByTagName('h2')->length) {
  385. $first = $dom->getElementsByTagName('h2')->item(0);
  386. $first->parentNode->removeChild($first);
  387. }
  388. $readme = $dom->saveHTML();
  389. $readme = substr($readme, strpos($readme, '<body>')+6);
  390. $readme = substr($readme, 0, strrpos($readme, '</body>'));
  391. $package->setReadme($readme);
  392. }
  393. if (!empty($repoData['language'])) {
  394. $package->setLanguage($repoData['language']);
  395. }
  396. if (!empty($repoData['stargazers_count'])) {
  397. $package->setGitHubStars($repoData['stargazers_count']);
  398. }
  399. if (!empty($repoData['subscribers_count'])) {
  400. $package->setGitHubWatches($repoData['subscribers_count']);
  401. }
  402. if (!empty($repoData['network_count'])) {
  403. $package->setGitHubForks($repoData['network_count']);
  404. }
  405. if (!empty($repoData['open_issues_count'])) {
  406. $package->setGitHubOpenIssues($repoData['open_issues_count']);
  407. }
  408. }
  409. }