Updater.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. <?php
  2. /*
  3. * This file is part of Packagist.
  4. *
  5. * (c) Jordi Boggiano <j.boggiano@seld.be>
  6. * Nils Adermann <naderman@naderman.de>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Packagist\WebBundle\Package;
  12. use Composer\Package\AliasPackage;
  13. use Composer\Package\PackageInterface;
  14. use Composer\Repository\RepositoryInterface;
  15. use Composer\Repository\VcsRepository;
  16. use Composer\Repository\InvalidRepositoryException;
  17. use Composer\Util\ErrorHandler;
  18. use Composer\Util\RemoteFilesystem;
  19. use Composer\Json\JsonFile;
  20. use Composer\Config;
  21. use Composer\IO\IOInterface;
  22. use Packagist\WebBundle\Entity\Author;
  23. use Packagist\WebBundle\Entity\Package;
  24. use Packagist\WebBundle\Entity\Tag;
  25. use Packagist\WebBundle\Entity\Version;
  26. use Packagist\WebBundle\Entity\SuggestLink;
  27. use Symfony\Bridge\Doctrine\RegistryInterface;
  28. /**
  29. * @author Jordi Boggiano <j.boggiano@seld.be>
  30. */
  31. class Updater
  32. {
  33. const UPDATE_EQUAL_REFS = 1;
  34. const DELETE_BEFORE = 2;
  35. /**
  36. * Doctrine
  37. * @var RegistryInterface
  38. */
  39. protected $doctrine;
  40. /**
  41. * Supported link types
  42. * @var array
  43. */
  44. protected $supportedLinkTypes = array(
  45. 'require' => array(
  46. 'method' => 'getRequires',
  47. 'entity' => 'RequireLink',
  48. ),
  49. 'conflict' => array(
  50. 'method' => 'getConflicts',
  51. 'entity' => 'ConflictLink',
  52. ),
  53. 'provide' => array(
  54. 'method' => 'getProvides',
  55. 'entity' => 'ProvideLink',
  56. ),
  57. 'replace' => array(
  58. 'method' => 'getReplaces',
  59. 'entity' => 'ReplaceLink',
  60. ),
  61. 'devRequire' => array(
  62. 'method' => 'getDevRequires',
  63. 'entity' => 'DevRequireLink',
  64. ),
  65. );
  66. /**
  67. * Constructor
  68. *
  69. * @param RegistryInterface $doctrine
  70. */
  71. public function __construct(RegistryInterface $doctrine)
  72. {
  73. $this->doctrine = $doctrine;
  74. ErrorHandler::register();
  75. }
  76. /**
  77. * Update a project
  78. *
  79. * @param \Packagist\WebBundle\Entity\Package $package
  80. * @param RepositoryInterface $repository the repository instance used to update from
  81. * @param int $flags a few of the constants of this class
  82. * @param \DateTime $start
  83. */
  84. public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null)
  85. {
  86. $rfs = new RemoteFilesystem($io, $config);
  87. $blacklist = '{^symfony/symfony (2.0.[456]|dev-charset|dev-console)}i';
  88. if (null === $start) {
  89. $start = new \DateTime();
  90. }
  91. $pruneDate = clone $start;
  92. $pruneDate->modify('-1min');
  93. $em = $this->doctrine->getManager();
  94. if ($repository instanceof VcsRepository) {
  95. $cfg = $repository->getRepoConfig();
  96. if (isset($cfg['url']) && preg_match('{\bgithub\.com\b}', $cfg['url'])) {
  97. foreach ($package->getMaintainers() as $maintainer) {
  98. if ($newGithubToken = $maintainer->getGithubToken()) {
  99. $context = stream_context_create(['http' => ['header' => 'User-agent: packagist-token-check']]);
  100. $rate = json_decode(@file_get_contents('https://api.github.com/rate_limit?access_token='.$newGithubToken, false, $context), true);
  101. // invalid/outdated token, wipe it so we don't try it again
  102. if (!$rate && (strpos($http_response_header[0], '403') || strpos($http_response_header[0], '401'))) {
  103. $maintainer->setGithubToken(null);
  104. $em->flush($maintainer);
  105. continue;
  106. }
  107. // not enough limit left
  108. if (is_array($rate) && $rate['resources']['core']['remaining'] < 100) {
  109. continue;
  110. }
  111. $io->setAuthentication('github.com', $newGithubToken, 'x-oauth-basic');
  112. break;
  113. }
  114. }
  115. }
  116. }
  117. $versions = $repository->getPackages();
  118. usort($versions, function ($a, $b) {
  119. $aVersion = $a->getVersion();
  120. $bVersion = $b->getVersion();
  121. if ($aVersion === '9999999-dev' || 'dev-' === substr($aVersion, 0, 4)) {
  122. $aVersion = 'dev';
  123. }
  124. if ($bVersion === '9999999-dev' || 'dev-' === substr($bVersion, 0, 4)) {
  125. $bVersion = 'dev';
  126. }
  127. $aIsDev = $aVersion === 'dev' || substr($aVersion, -4) === '-dev';
  128. $bIsDev = $bVersion === 'dev' || substr($bVersion, -4) === '-dev';
  129. // push dev versions to the end
  130. if ($aIsDev !== $bIsDev) {
  131. return $aIsDev ? 1 : -1;
  132. }
  133. // equal versions are sorted by date
  134. if ($aVersion === $bVersion) {
  135. return $a->getReleaseDate() > $b->getReleaseDate() ? 1 : -1;
  136. }
  137. // the rest is sorted by version
  138. return version_compare($aVersion, $bVersion);
  139. });
  140. $versionRepository = $this->doctrine->getRepository('PackagistWebBundle:Version');
  141. if ($flags & self::DELETE_BEFORE) {
  142. foreach ($package->getVersions() as $version) {
  143. $versionRepository->remove($version);
  144. }
  145. $em->flush();
  146. $em->refresh($package);
  147. }
  148. $lastUpdated = true;
  149. foreach ($versions as $version) {
  150. if ($version instanceof AliasPackage) {
  151. continue;
  152. }
  153. if (preg_match($blacklist, $version->getName().' '.$version->getPrettyVersion())) {
  154. continue;
  155. }
  156. $lastUpdated = $this->updateInformation($package, $version, $flags);
  157. if ($lastUpdated) {
  158. $em->flush();
  159. }
  160. }
  161. if (!$lastUpdated) {
  162. $em->flush();
  163. }
  164. // remove outdated versions
  165. foreach ($package->getVersions() as $version) {
  166. if ($version->getUpdatedAt() < $pruneDate) {
  167. $versionRepository->remove($version);
  168. }
  169. }
  170. if (preg_match('{^(?:git://|git@|https?://)github.com[:/]([^/]+)/(.+?)(?:\.git|/)?$}i', $package->getRepository(), $match)) {
  171. $this->updateGitHubInfo($rfs, $package, $match[1], $match[2]);
  172. }
  173. $package->setUpdatedAt(new \DateTime);
  174. $package->setCrawledAt(new \DateTime);
  175. $em->flush();
  176. if ($repository->hadInvalidBranches()) {
  177. throw new InvalidRepositoryException('Some branches contained invalid data and were discarded, it is advised to review the log and fix any issues present in branches');
  178. }
  179. }
  180. private function updateInformation(Package $package, PackageInterface $data, $flags)
  181. {
  182. $em = $this->doctrine->getManager();
  183. $version = new Version();
  184. $normVersion = $data->getVersion();
  185. $existingVersion = $package->getVersion($normVersion);
  186. if ($existingVersion) {
  187. $source = $existingVersion->getSource();
  188. // update if the right flag is set, or the source reference has changed (re-tag or new commit on branch)
  189. if ($source['reference'] !== $data->getSourceReference() || ($flags & self::UPDATE_EQUAL_REFS)) {
  190. $version = $existingVersion;
  191. } else {
  192. // mark it updated to avoid it being pruned
  193. $existingVersion->setUpdatedAt(new \DateTime);
  194. return false;
  195. }
  196. }
  197. $version->setName($package->getName());
  198. $version->setVersion($data->getPrettyVersion());
  199. $version->setNormalizedVersion($normVersion);
  200. $version->setDevelopment($data->isDev());
  201. $em->persist($version);
  202. $descr = $this->sanitize($data->getDescription());
  203. $version->setDescription($descr);
  204. $package->setDescription($descr);
  205. $version->setHomepage($data->getHomepage());
  206. $version->setLicense($data->getLicense() ?: array());
  207. $version->setPackage($package);
  208. $version->setUpdatedAt(new \DateTime);
  209. $version->setReleasedAt($data->getReleaseDate());
  210. if ($data->getSourceType()) {
  211. $source['type'] = $data->getSourceType();
  212. $source['url'] = $data->getSourceUrl();
  213. $source['reference'] = $data->getSourceReference();
  214. $version->setSource($source);
  215. } else {
  216. $version->setSource(null);
  217. }
  218. if ($data->getDistType()) {
  219. $dist['type'] = $data->getDistType();
  220. $dist['url'] = $data->getDistUrl();
  221. $dist['reference'] = $data->getDistReference();
  222. $dist['shasum'] = $data->getDistSha1Checksum();
  223. $version->setDist($dist);
  224. } else {
  225. $version->setDist(null);
  226. }
  227. if ($data->getType()) {
  228. $type = $this->sanitize($data->getType());
  229. $version->setType($type);
  230. if ($type !== $package->getType()) {
  231. $package->setType($type);
  232. }
  233. }
  234. $version->setTargetDir($data->getTargetDir());
  235. $version->setAutoload($data->getAutoload());
  236. $version->setExtra($data->getExtra());
  237. $version->setBinaries($data->getBinaries());
  238. $version->setIncludePaths($data->getIncludePaths());
  239. $version->setSupport($data->getSupport());
  240. $version->getTags()->clear();
  241. if ($data->getKeywords()) {
  242. $keywords = array();
  243. foreach ($data->getKeywords() as $keyword) {
  244. $keywords[mb_strtolower($keyword, 'UTF-8')] = $keyword;
  245. }
  246. foreach ($keywords as $keyword) {
  247. $tag = Tag::getByName($em, $keyword, true);
  248. if (!$version->getTags()->contains($tag)) {
  249. $version->addTag($tag);
  250. }
  251. }
  252. }
  253. $authorRepository = $this->doctrine->getRepository('PackagistWebBundle:Author');
  254. $version->getAuthors()->clear();
  255. if ($data->getAuthors()) {
  256. foreach ($data->getAuthors() as $authorData) {
  257. $author = null;
  258. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  259. if (isset($authorData[$field])) {
  260. $authorData[$field] = trim($authorData[$field]);
  261. if ('' === $authorData[$field]) {
  262. $authorData[$field] = null;
  263. }
  264. } else {
  265. $authorData[$field] = null;
  266. }
  267. }
  268. // skip authors with no information
  269. if (!isset($authorData['email']) && !isset($authorData['name'])) {
  270. continue;
  271. }
  272. $author = $authorRepository->findOneBy(array(
  273. 'email' => $authorData['email'],
  274. 'name' => $authorData['name'],
  275. 'homepage' => $authorData['homepage'],
  276. 'role' => $authorData['role'],
  277. ));
  278. if (!$author) {
  279. $author = new Author();
  280. $em->persist($author);
  281. }
  282. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  283. if (isset($authorData[$field])) {
  284. $author->{'set'.$field}($authorData[$field]);
  285. }
  286. }
  287. $author->setUpdatedAt(new \DateTime);
  288. if (!$version->getAuthors()->contains($author)) {
  289. $version->addAuthor($author);
  290. }
  291. if (!$author->getVersions()->contains($version)) {
  292. $author->addVersion($version);
  293. }
  294. }
  295. }
  296. // handle links
  297. foreach ($this->supportedLinkTypes as $linkType => $opts) {
  298. $links = array();
  299. foreach ($data->{$opts['method']}() as $link) {
  300. $constraint = $link->getPrettyConstraint();
  301. if (false !== strpos($constraint, ',') && false !== strpos($constraint, '@')) {
  302. $constraint = preg_replace_callback('{([><]=?\s*[^@]+?)@([a-z]+)}i', function ($matches) {
  303. if ($matches[2] === 'stable') {
  304. return $matches[1];
  305. }
  306. return $matches[1].'-'.$matches[2];
  307. }, $constraint);
  308. }
  309. $links[$link->getTarget()] = $constraint;
  310. }
  311. foreach ($version->{'get'.$linkType}() as $link) {
  312. // clear links that have changed/disappeared (for updates)
  313. if (!isset($links[$link->getPackageName()]) || $links[$link->getPackageName()] !== $link->getPackageVersion()) {
  314. $version->{'get'.$linkType}()->removeElement($link);
  315. $em->remove($link);
  316. } else {
  317. // clear those that are already set
  318. unset($links[$link->getPackageName()]);
  319. }
  320. }
  321. foreach ($links as $linkPackageName => $linkPackageVersion) {
  322. $class = 'Packagist\WebBundle\Entity\\'.$opts['entity'];
  323. $link = new $class;
  324. $link->setPackageName($linkPackageName);
  325. $link->setPackageVersion($linkPackageVersion);
  326. $version->{'add'.$linkType.'Link'}($link);
  327. $link->setVersion($version);
  328. $em->persist($link);
  329. }
  330. }
  331. // handle suggests
  332. if ($suggests = $data->getSuggests()) {
  333. foreach ($version->getSuggest() as $link) {
  334. // clear links that have changed/disappeared (for updates)
  335. if (!isset($suggests[$link->getPackageName()]) || $suggests[$link->getPackageName()] !== $link->getPackageVersion()) {
  336. $version->getSuggest()->removeElement($link);
  337. $em->remove($link);
  338. } else {
  339. // clear those that are already set
  340. unset($suggests[$link->getPackageName()]);
  341. }
  342. }
  343. foreach ($suggests as $linkPackageName => $linkPackageVersion) {
  344. $link = new SuggestLink;
  345. $link->setPackageName($linkPackageName);
  346. $link->setPackageVersion($linkPackageVersion);
  347. $version->addSuggestLink($link);
  348. $link->setVersion($version);
  349. $em->persist($link);
  350. }
  351. } elseif (count($version->getSuggest())) {
  352. // clear existing suggests if present
  353. foreach ($version->getSuggest() as $link) {
  354. $em->remove($link);
  355. }
  356. $version->getSuggest()->clear();
  357. }
  358. if (!$package->getVersions()->contains($version)) {
  359. $package->addVersions($version);
  360. }
  361. return true;
  362. }
  363. private function updateGitHubInfo(RemoteFilesystem $rfs, Package $package, $owner, $repo)
  364. {
  365. $baseApiUrl = 'https://api.github.com/repos/'.$owner.'/'.$repo;
  366. try {
  367. $repoData = JsonFile::parseJson($rfs->getContents('github.com', $baseApiUrl, false), $baseApiUrl);
  368. } catch (\Exception $e) {
  369. return;
  370. }
  371. try {
  372. $opts = ['http' => ['header' => ['Accept: application/vnd.github.v3.html']]];
  373. $readme = $rfs->getContents('github.com', $baseApiUrl.'/readme', false, $opts);
  374. } catch (\Exception $e) {
  375. if (!$e instanceof \Composer\Downloader\TransportException || $e->getCode() !== 404) {
  376. return;
  377. }
  378. // 404s just mean no readme present so we proceed with the rest
  379. }
  380. if (!empty($readme)) {
  381. $elements = array(
  382. 'p',
  383. 'br',
  384. 'small',
  385. 'strong', 'b',
  386. 'em', 'i',
  387. 'strike',
  388. 'sub', 'sup',
  389. 'ins', 'del',
  390. 'ol', 'ul', 'li',
  391. 'h1', 'h2', 'h3',
  392. 'dl', 'dd', 'dt',
  393. 'pre', 'code', 'samp', 'kbd',
  394. 'q', 'blockquote', 'abbr', 'cite',
  395. 'table', 'thead', 'tbody', 'th', 'tr', 'td',
  396. 'a[href|target|rel|id]',
  397. 'img[src|title|alt|width|height|style]'
  398. );
  399. $config = \HTMLPurifier_Config::createDefault();
  400. $config->set('HTML.Allowed', implode(',', $elements));
  401. $config->set('Attr.EnableID', true);
  402. $config->set('Attr.AllowedFrameTargets', ['_blank']);
  403. $purifier = new \HTMLPurifier($config);
  404. $readme = $purifier->purify($readme);
  405. $dom = new \DOMDocument();
  406. $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
  407. // Links can not be trusted, mark them nofollow and convert relative to absolute links
  408. $links = $dom->getElementsByTagName('a');
  409. foreach ($links as $link) {
  410. $link->setAttribute('rel', 'nofollow');
  411. if ('#' === substr($link->getAttribute('href'), 0, 1)) {
  412. $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
  413. } elseif (false === strpos($link->getAttribute('href'), '//')) {
  414. $link->setAttribute('href', 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$link->getAttribute('href'));
  415. }
  416. }
  417. // convert relative to absolute images
  418. $images = $dom->getElementsByTagName('img');
  419. foreach ($images as $img) {
  420. if (false === strpos($img->getAttribute('src'), '//')) {
  421. $img->setAttribute('src', 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$img->getAttribute('src'));
  422. }
  423. }
  424. // remove first title as it's usually the project name which we don't need
  425. if ($dom->getElementsByTagName('h1')->length) {
  426. $first = $dom->getElementsByTagName('h1')->item(0);
  427. $first->parentNode->removeChild($first);
  428. } elseif ($dom->getElementsByTagName('h2')->length) {
  429. $first = $dom->getElementsByTagName('h2')->item(0);
  430. $first->parentNode->removeChild($first);
  431. }
  432. $readme = $dom->saveHTML();
  433. $readme = substr($readme, strpos($readme, '<body>')+6);
  434. $readme = substr($readme, 0, strrpos($readme, '</body>'));
  435. $package->setReadme($readme);
  436. }
  437. if (!empty($repoData['language'])) {
  438. $package->setLanguage($repoData['language']);
  439. }
  440. if (isset($repoData['stargazers_count'])) {
  441. $package->setGitHubStars($repoData['stargazers_count']);
  442. }
  443. if (isset($repoData['subscribers_count'])) {
  444. $package->setGitHubWatches($repoData['subscribers_count']);
  445. }
  446. if (isset($repoData['network_count'])) {
  447. $package->setGitHubForks($repoData['network_count']);
  448. }
  449. if (isset($repoData['open_issues_count'])) {
  450. $package->setGitHubOpenIssues($repoData['open_issues_count']);
  451. }
  452. }
  453. private function sanitize($str)
  454. {
  455. // remove escape chars
  456. $str = preg_replace("{\x1B(?:\[.)?}u", '', $str);
  457. return preg_replace("{[\x01-\x1A]}u", '', $str);
  458. }
  459. }