Updater.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. <?php
  2. /*
  3. * This file is part of Packagist.
  4. *
  5. * (c) Jordi Boggiano <j.boggiano@seld.be>
  6. * Nils Adermann <naderman@naderman.de>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Packagist\WebBundle\Package;
  12. use cebe\markdown\GithubMarkdown;
  13. use Composer\Package\AliasPackage;
  14. use Composer\Package\PackageInterface;
  15. use Composer\Repository\RepositoryInterface;
  16. use Composer\Repository\VcsRepository;
  17. use Composer\Repository\Vcs\GitHubDriver;
  18. use Composer\Repository\InvalidRepositoryException;
  19. use Composer\Util\ErrorHandler;
  20. use Composer\Util\RemoteFilesystem;
  21. use Composer\Config;
  22. use Composer\IO\IOInterface;
  23. use Packagist\WebBundle\Entity\Author;
  24. use Packagist\WebBundle\Entity\Package;
  25. use Packagist\WebBundle\Entity\Tag;
  26. use Packagist\WebBundle\Entity\Version;
  27. use Packagist\WebBundle\Entity\VersionRepository;
  28. use Packagist\WebBundle\Entity\SuggestLink;
  29. use Symfony\Bridge\Doctrine\RegistryInterface;
  30. use Doctrine\DBAL\Connection;
  31. /**
  32. * @author Jordi Boggiano <j.boggiano@seld.be>
  33. */
  34. class Updater
  35. {
  36. const UPDATE_EQUAL_REFS = 1;
  37. const DELETE_BEFORE = 2;
  38. /**
  39. * Doctrine
  40. * @var RegistryInterface
  41. */
  42. protected $doctrine;
  43. /**
  44. * Supported link types
  45. * @var array
  46. */
  47. protected $supportedLinkTypes = array(
  48. 'require' => array(
  49. 'method' => 'getRequires',
  50. 'entity' => 'RequireLink',
  51. ),
  52. 'conflict' => array(
  53. 'method' => 'getConflicts',
  54. 'entity' => 'ConflictLink',
  55. ),
  56. 'provide' => array(
  57. 'method' => 'getProvides',
  58. 'entity' => 'ProvideLink',
  59. ),
  60. 'replace' => array(
  61. 'method' => 'getReplaces',
  62. 'entity' => 'ReplaceLink',
  63. ),
  64. 'devRequire' => array(
  65. 'method' => 'getDevRequires',
  66. 'entity' => 'DevRequireLink',
  67. ),
  68. );
  69. /**
  70. * Constructor
  71. *
  72. * @param RegistryInterface $doctrine
  73. */
  74. public function __construct(RegistryInterface $doctrine)
  75. {
  76. $this->doctrine = $doctrine;
  77. ErrorHandler::register();
  78. }
  79. /**
  80. * Update a project
  81. *
  82. * @param \Packagist\WebBundle\Entity\Package $package
  83. * @param RepositoryInterface $repository the repository instance used to update from
  84. * @param int $flags a few of the constants of this class
  85. * @param \DateTime $start
  86. */
  87. public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null): Package
  88. {
  89. $rfs = new RemoteFilesystem($io, $config);
  90. if (null === $start) {
  91. $start = new \DateTime();
  92. }
  93. $deleteDate = clone $start;
  94. $deleteDate->modify('-1day');
  95. $em = $this->doctrine->getManager();
  96. $apc = extension_loaded('apcu');
  97. $rootIdentifier = null;
  98. if ($repository instanceof VcsRepository) {
  99. $cfg = $repository->getRepoConfig();
  100. if (isset($cfg['url']) && preg_match('{\bgithub\.com\b}', $cfg['url'])) {
  101. foreach ($package->getMaintainers() as $maintainer) {
  102. if (!($newGithubToken = $maintainer->getGithubToken())) {
  103. continue;
  104. }
  105. $valid = null;
  106. if ($apc) {
  107. $valid = apcu_fetch('is_token_valid_'.$maintainer->getUsernameCanonical());
  108. }
  109. if (true !== $valid) {
  110. $context = stream_context_create(['http' => ['header' => 'User-agent: packagist-token-check']]);
  111. $rate = json_decode(@file_get_contents('https://api.github.com/rate_limit?access_token='.$newGithubToken, false, $context), true);
  112. // invalid/outdated token, wipe it so we don't try it again
  113. if (!$rate && (strpos($http_response_header[0], '403') || strpos($http_response_header[0], '401'))) {
  114. $maintainer->setGithubToken(null);
  115. $em->flush($maintainer);
  116. continue;
  117. }
  118. }
  119. if ($apc) {
  120. apcu_store('is_token_valid_'.$maintainer->getUsernameCanonical(), true, 86400);
  121. }
  122. $io->setAuthentication('github.com', $newGithubToken, 'x-oauth-basic');
  123. break;
  124. }
  125. }
  126. if (!$repository->getDriver()) {
  127. throw new \RuntimeException('Driver could not be established for package '.$package->getName().' ('.$package->getRepository().')');
  128. }
  129. $rootIdentifier = $repository->getDriver()->getRootIdentifier();
  130. }
  131. $versions = $repository->getPackages();
  132. usort($versions, function ($a, $b) {
  133. $aVersion = $a->getVersion();
  134. $bVersion = $b->getVersion();
  135. if ($aVersion === '9999999-dev' || 'dev-' === substr($aVersion, 0, 4)) {
  136. $aVersion = 'dev';
  137. }
  138. if ($bVersion === '9999999-dev' || 'dev-' === substr($bVersion, 0, 4)) {
  139. $bVersion = 'dev';
  140. }
  141. $aIsDev = $aVersion === 'dev' || substr($aVersion, -4) === '-dev';
  142. $bIsDev = $bVersion === 'dev' || substr($bVersion, -4) === '-dev';
  143. // push dev versions to the end
  144. if ($aIsDev !== $bIsDev) {
  145. return $aIsDev ? 1 : -1;
  146. }
  147. // equal versions are sorted by date
  148. if ($aVersion === $bVersion) {
  149. return $a->getReleaseDate() > $b->getReleaseDate() ? 1 : -1;
  150. }
  151. // the rest is sorted by version
  152. return version_compare($aVersion, $bVersion);
  153. });
  154. $versionRepository = $this->doctrine->getRepository('PackagistWebBundle:Version');
  155. if ($flags & self::DELETE_BEFORE) {
  156. foreach ($package->getVersions() as $version) {
  157. $versionRepository->remove($version);
  158. }
  159. $em->flush();
  160. $em->refresh($package);
  161. }
  162. $existingVersions = $versionRepository->getVersionMetadataForUpdate($package);
  163. $processedVersions = [];
  164. $lastUpdated = true;
  165. $lastProcessed = null;
  166. $idsToMarkUpdated = [];
  167. foreach ($versions as $version) {
  168. if ($version instanceof AliasPackage) {
  169. continue;
  170. }
  171. if (isset($processedVersions[strtolower($version->getVersion())])) {
  172. $io->write('Skipping version '.$version->getPrettyVersion().' (duplicate of '.$processedVersions[strtolower($version->getVersion())]->getPrettyVersion().')', true, IOInterface::VERBOSE);
  173. continue;
  174. }
  175. $processedVersions[strtolower($version->getVersion())] = $version;
  176. $result = $this->updateInformation($versionRepository, $package, $existingVersions, $version, $flags, $rootIdentifier);
  177. $lastUpdated = $result['updated'];
  178. if ($lastUpdated) {
  179. $em->flush();
  180. $em->clear();
  181. $package = $em->merge($package);
  182. } else {
  183. $idsToMarkUpdated[] = $result['id'];
  184. }
  185. // mark the version processed so we can prune leftover ones
  186. unset($existingVersions[$result['version']]);
  187. }
  188. // mark versions that did not update as updated to avoid them being pruned
  189. $em->getConnection()->executeUpdate(
  190. 'UPDATE package_version SET updatedAt = :now, softDeletedAt = NULL WHERE id IN (:ids)',
  191. ['now' => date('Y-m-d H:i:s'), 'ids' => $idsToMarkUpdated],
  192. ['ids' => Connection::PARAM_INT_ARRAY]
  193. );
  194. // remove outdated versions
  195. foreach ($existingVersions as $version) {
  196. if (!is_null($version['softDeletedAt']) && new \DateTime($version['softDeletedAt']) < $deleteDate) {
  197. $versionRepository->remove($versionRepository->findOneById($version['id']));
  198. } else {
  199. // set it to be soft-deleted so next update that occurs after deleteDate (1day) if the
  200. // version is still missing it will be really removed
  201. $em->getConnection()->executeUpdate(
  202. 'UPDATE package_version SET softDeletedAt = :now WHERE id = :id',
  203. ['now' => date('Y-m-d H:i:s'), 'id' => $version['id']]
  204. );
  205. }
  206. }
  207. if (preg_match('{^(?:git://|git@|https?://)github.com[:/]([^/]+)/(.+?)(?:\.git|/)?$}i', $package->getRepository(), $match) && $repository instanceof VcsRepository) {
  208. $this->updateGitHubInfo($rfs, $package, $match[1], $match[2], $repository);
  209. } else {
  210. $this->updateReadme($io, $package, $repository);
  211. }
  212. $package->setUpdatedAt(new \DateTime);
  213. $package->setCrawledAt(new \DateTime);
  214. $em->flush();
  215. if ($repository->hadInvalidBranches()) {
  216. throw new InvalidRepositoryException('Some branches contained invalid data and were discarded, it is advised to review the log and fix any issues present in branches');
  217. }
  218. return $package;
  219. }
  220. /**
  221. * @return array with keys:
  222. * - updated (whether the version was updated or needs to be marked as updated)
  223. * - id (version id, can be null for newly created versions)
  224. * - version (normalized version from the composer package)
  225. * - object (Version instance if it was updated)
  226. */
  227. private function updateInformation(VersionRepository $versionRepo, Package $package, array $existingVersions, PackageInterface $data, $flags, $rootIdentifier)
  228. {
  229. $em = $this->doctrine->getManager();
  230. $version = new Version();
  231. $normVersion = $data->getVersion();
  232. $existingVersion = $existingVersions[strtolower($normVersion)] ?? null;
  233. if ($existingVersion) {
  234. $source = $existingVersion['source'];
  235. // update if the right flag is set, or the source reference has changed (re-tag or new commit on branch)
  236. if ($source['reference'] !== $data->getSourceReference() || ($flags & self::UPDATE_EQUAL_REFS)) {
  237. $version = $versionRepo->findOneById($existingVersion['id']);
  238. } else {
  239. return ['updated' => false, 'id' => $existingVersion['id'], 'version' => strtolower($normVersion), 'object' => null];
  240. }
  241. }
  242. $version->setName($package->getName());
  243. $version->setVersion($data->getPrettyVersion());
  244. $version->setNormalizedVersion($normVersion);
  245. $version->setDevelopment($data->isDev());
  246. $em->persist($version);
  247. $descr = $this->sanitize($data->getDescription());
  248. $version->setDescription($descr);
  249. // update the package description only for the default branch
  250. if ($rootIdentifier === null || preg_replace('{dev-|(\.x)?-dev}', '', $version->getVersion()) === $rootIdentifier) {
  251. $package->setDescription($descr);
  252. }
  253. $version->setHomepage($data->getHomepage());
  254. $version->setLicense($data->getLicense() ?: array());
  255. $version->setPackage($package);
  256. $version->setUpdatedAt(new \DateTime);
  257. $version->setSoftDeletedAt(null);
  258. $version->setReleasedAt($data->getReleaseDate());
  259. if ($data->getSourceType()) {
  260. $source['type'] = $data->getSourceType();
  261. $source['url'] = $data->getSourceUrl();
  262. $source['reference'] = $data->getSourceReference();
  263. $version->setSource($source);
  264. } else {
  265. $version->setSource(null);
  266. }
  267. if ($data->getDistType()) {
  268. $dist['type'] = $data->getDistType();
  269. $dist['url'] = $data->getDistUrl();
  270. $dist['reference'] = $data->getDistReference();
  271. $dist['shasum'] = $data->getDistSha1Checksum();
  272. $version->setDist($dist);
  273. } else {
  274. $version->setDist(null);
  275. }
  276. if ($data->getType()) {
  277. $type = $this->sanitize($data->getType());
  278. $version->setType($type);
  279. if ($type !== $package->getType()) {
  280. $package->setType($type);
  281. }
  282. }
  283. $version->setTargetDir($data->getTargetDir());
  284. $version->setAutoload($data->getAutoload());
  285. $version->setExtra($data->getExtra());
  286. $version->setBinaries($data->getBinaries());
  287. $version->setIncludePaths($data->getIncludePaths());
  288. $version->setSupport($data->getSupport());
  289. if ($data->getKeywords()) {
  290. $keywords = array();
  291. foreach ($data->getKeywords() as $keyword) {
  292. $keywords[mb_strtolower($keyword, 'UTF-8')] = $keyword;
  293. }
  294. $existingTags = [];
  295. foreach ($version->getTags() as $tag) {
  296. $existingTags[mb_strtolower($tag->getName(), 'UTF-8')] = $tag;
  297. }
  298. foreach ($keywords as $tagKey => $keyword) {
  299. if (isset($existingTags[$tagKey])) {
  300. unset($existingTags[$tagKey]);
  301. continue;
  302. }
  303. $tag = Tag::getByName($em, $keyword, true);
  304. if (!$version->getTags()->contains($tag)) {
  305. $version->addTag($tag);
  306. }
  307. }
  308. foreach ($existingTags as $tag) {
  309. $version->getTags()->removeElement($tag);
  310. }
  311. } elseif (count($version->getTags())) {
  312. $version->getTags()->clear();
  313. }
  314. $authorRepository = $this->doctrine->getRepository('PackagistWebBundle:Author');
  315. $version->getAuthors()->clear();
  316. if ($data->getAuthors()) {
  317. foreach ($data->getAuthors() as $authorData) {
  318. $author = null;
  319. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  320. if (isset($authorData[$field])) {
  321. $authorData[$field] = trim($authorData[$field]);
  322. if ('' === $authorData[$field]) {
  323. $authorData[$field] = null;
  324. }
  325. } else {
  326. $authorData[$field] = null;
  327. }
  328. }
  329. // skip authors with no information
  330. if (!isset($authorData['email']) && !isset($authorData['name'])) {
  331. continue;
  332. }
  333. $author = $authorRepository->findOneBy(array(
  334. 'email' => $authorData['email'],
  335. 'name' => $authorData['name'],
  336. 'homepage' => $authorData['homepage'],
  337. 'role' => $authorData['role'],
  338. ));
  339. if (!$author) {
  340. $author = new Author();
  341. $em->persist($author);
  342. }
  343. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  344. if (isset($authorData[$field])) {
  345. $author->{'set'.$field}($authorData[$field]);
  346. }
  347. }
  348. // only update the author timestamp once a month at most as the value is kinda unused
  349. if ($author->getUpdatedAt() === null || $author->getUpdatedAt()->getTimestamp() < time() - 86400 * 30) {
  350. $author->setUpdatedAt(new \DateTime);
  351. }
  352. if (!$version->getAuthors()->contains($author)) {
  353. $version->addAuthor($author);
  354. }
  355. }
  356. }
  357. // handle links
  358. foreach ($this->supportedLinkTypes as $linkType => $opts) {
  359. $links = array();
  360. foreach ($data->{$opts['method']}() as $link) {
  361. $constraint = $link->getPrettyConstraint();
  362. if (false !== strpos($constraint, ',') && false !== strpos($constraint, '@')) {
  363. $constraint = preg_replace_callback('{([><]=?\s*[^@]+?)@([a-z]+)}i', function ($matches) {
  364. if ($matches[2] === 'stable') {
  365. return $matches[1];
  366. }
  367. return $matches[1].'-'.$matches[2];
  368. }, $constraint);
  369. }
  370. $links[$link->getTarget()] = $constraint;
  371. }
  372. foreach ($version->{'get'.$linkType}() as $link) {
  373. // clear links that have changed/disappeared (for updates)
  374. if (!isset($links[$link->getPackageName()]) || $links[$link->getPackageName()] !== $link->getPackageVersion()) {
  375. $version->{'get'.$linkType}()->removeElement($link);
  376. $em->remove($link);
  377. } else {
  378. // clear those that are already set
  379. unset($links[$link->getPackageName()]);
  380. }
  381. }
  382. foreach ($links as $linkPackageName => $linkPackageVersion) {
  383. $class = 'Packagist\WebBundle\Entity\\'.$opts['entity'];
  384. $link = new $class;
  385. $link->setPackageName($linkPackageName);
  386. $link->setPackageVersion($linkPackageVersion);
  387. $version->{'add'.$linkType.'Link'}($link);
  388. $link->setVersion($version);
  389. $em->persist($link);
  390. }
  391. }
  392. // handle suggests
  393. if ($suggests = $data->getSuggests()) {
  394. foreach ($version->getSuggest() as $link) {
  395. // clear links that have changed/disappeared (for updates)
  396. if (!isset($suggests[$link->getPackageName()]) || $suggests[$link->getPackageName()] !== $link->getPackageVersion()) {
  397. $version->getSuggest()->removeElement($link);
  398. $em->remove($link);
  399. } else {
  400. // clear those that are already set
  401. unset($suggests[$link->getPackageName()]);
  402. }
  403. }
  404. foreach ($suggests as $linkPackageName => $linkPackageVersion) {
  405. $link = new SuggestLink;
  406. $link->setPackageName($linkPackageName);
  407. $link->setPackageVersion($linkPackageVersion);
  408. $version->addSuggestLink($link);
  409. $link->setVersion($version);
  410. $em->persist($link);
  411. }
  412. } elseif (count($version->getSuggest())) {
  413. // clear existing suggests if present
  414. foreach ($version->getSuggest() as $link) {
  415. $em->remove($link);
  416. }
  417. $version->getSuggest()->clear();
  418. }
  419. return ['updated' => true, 'id' => $version->getId(), 'version' => strtolower($normVersion), 'object' => $version];
  420. }
  421. /**
  422. * Update the readme for $package from $repository.
  423. *
  424. * @param IOInterface $io
  425. * @param Package $package
  426. * @param VcsRepository $repository
  427. */
  428. private function updateReadme(IOInterface $io, Package $package, VcsRepository $repository)
  429. {
  430. try {
  431. $driver = $repository->getDriver();
  432. $composerInfo = $driver->getComposerInformation($driver->getRootIdentifier());
  433. if (isset($composerInfo['readme'])) {
  434. $readmeFile = $composerInfo['readme'];
  435. } else {
  436. $readmeFile = 'README.md';
  437. }
  438. $ext = substr($readmeFile, strrpos($readmeFile, '.'));
  439. if ($ext === $readmeFile) {
  440. $ext = '.txt';
  441. }
  442. switch ($ext) {
  443. case '.txt':
  444. $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
  445. if (!empty($source)) {
  446. $package->setReadme('<pre>' . htmlspecialchars($source) . '</pre>');
  447. }
  448. break;
  449. case '.md':
  450. $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
  451. $parser = new GithubMarkdown();
  452. $readme = $parser->parse($source);
  453. if (!empty($readme)) {
  454. $package->setReadme($this->prepareReadme($readme));
  455. }
  456. break;
  457. }
  458. } catch (\Exception $e) {
  459. // we ignore all errors for this minor function
  460. $io->write(
  461. 'Can not update readme. Error: ' . $e->getMessage(),
  462. true,
  463. IOInterface::VERBOSE
  464. );
  465. }
  466. }
  467. private function updateGitHubInfo(RemoteFilesystem $rfs, Package $package, $owner, $repo, VcsRepository $repository)
  468. {
  469. $baseApiUrl = 'https://api.github.com/repos/'.$owner.'/'.$repo;
  470. $driver = $repository->getDriver();
  471. if (!$driver instanceof GitHubDriver) {
  472. return;
  473. }
  474. $repoData = $driver->getRepoData();
  475. try {
  476. $opts = ['http' => ['header' => ['Accept: application/vnd.github.v3.html']]];
  477. $readme = $rfs->getContents('github.com', $baseApiUrl.'/readme', false, $opts);
  478. } catch (\Exception $e) {
  479. if (!$e instanceof \Composer\Downloader\TransportException || $e->getCode() !== 404) {
  480. return;
  481. }
  482. // 404s just mean no readme present so we proceed with the rest
  483. }
  484. if (!empty($readme)) {
  485. $package->setReadme($this->prepareReadme($readme, true, $owner, $repo));
  486. }
  487. if (!empty($repoData['language'])) {
  488. $package->setLanguage($repoData['language']);
  489. }
  490. if (isset($repoData['stargazers_count'])) {
  491. $package->setGitHubStars($repoData['stargazers_count']);
  492. }
  493. if (isset($repoData['subscribers_count'])) {
  494. $package->setGitHubWatches($repoData['subscribers_count']);
  495. }
  496. if (isset($repoData['network_count'])) {
  497. $package->setGitHubForks($repoData['network_count']);
  498. }
  499. if (isset($repoData['open_issues_count'])) {
  500. $package->setGitHubOpenIssues($repoData['open_issues_count']);
  501. }
  502. }
  503. /**
  504. * Prepare the readme by stripping elements and attributes that are not supported .
  505. *
  506. * @param string $readme
  507. * @param bool $isGithub
  508. * @param null $owner
  509. * @param null $repo
  510. * @return string
  511. */
  512. private function prepareReadme($readme, $isGithub = false, $owner = null, $repo = null)
  513. {
  514. $elements = array(
  515. 'p',
  516. 'br',
  517. 'small',
  518. 'strong', 'b',
  519. 'em', 'i',
  520. 'strike',
  521. 'sub', 'sup',
  522. 'ins', 'del',
  523. 'ol', 'ul', 'li',
  524. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  525. 'dl', 'dd', 'dt',
  526. 'pre', 'code', 'samp', 'kbd',
  527. 'q', 'blockquote', 'abbr', 'cite',
  528. 'table', 'thead', 'tbody', 'th', 'tr', 'td',
  529. 'a', 'span',
  530. 'img',
  531. 'details', 'summary',
  532. );
  533. $attributes = array(
  534. 'img.src', 'img.title', 'img.alt', 'img.width', 'img.height', 'img.style',
  535. 'a.href', 'a.target', 'a.rel', 'a.id',
  536. 'td.colspan', 'td.rowspan', 'th.colspan', 'th.rowspan',
  537. '*.class', 'details.open'
  538. );
  539. // detect base path if the github readme is located in a subfolder like docs/README.md
  540. $basePath = '';
  541. if ($isGithub && preg_match('{^<div id="readme" [^>]+?data-path="([^"]+)"}', $readme, $match) && false !== strpos($match[1], '/')) {
  542. $basePath = dirname($match[1]);
  543. }
  544. if ($basePath) {
  545. $basePath .= '/';
  546. }
  547. $config = \HTMLPurifier_Config::createDefault();
  548. $config->set('HTML.AllowedElements', implode(',', $elements));
  549. $config->set('HTML.AllowedAttributes', implode(',', $attributes));
  550. $config->set('Attr.EnableID', true);
  551. $config->set('Attr.AllowedFrameTargets', ['_blank']);
  552. // add custom HTML tag definitions
  553. $def = $config->getHTMLDefinition(true);
  554. $def->addElement('details', 'Block', 'Flow', 'Common', array(
  555. 'open' => 'Bool#open',
  556. ));
  557. $def->addElement('summary', 'Inline', 'Inline', 'Common');
  558. $purifier = new \HTMLPurifier($config);
  559. $readme = $purifier->purify($readme);
  560. $dom = new \DOMDocument();
  561. $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
  562. // Links can not be trusted, mark them nofollow and convert relative to absolute links
  563. $links = $dom->getElementsByTagName('a');
  564. foreach ($links as $link) {
  565. $link->setAttribute('rel', 'nofollow noindex noopener external');
  566. if ('#' === substr($link->getAttribute('href'), 0, 1)) {
  567. $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
  568. } elseif ('mailto:' === substr($link->getAttribute('href'), 0, 7)) {
  569. // do nothing
  570. } elseif ($isGithub && false === strpos($link->getAttribute('href'), '//')) {
  571. $link->setAttribute(
  572. 'href',
  573. 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$basePath.$link->getAttribute('href')
  574. );
  575. }
  576. }
  577. if ($isGithub) {
  578. // convert relative to absolute images
  579. $images = $dom->getElementsByTagName('img');
  580. foreach ($images as $img) {
  581. if (false === strpos($img->getAttribute('src'), '//')) {
  582. $img->setAttribute(
  583. 'src',
  584. 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$basePath.$img->getAttribute('src')
  585. );
  586. }
  587. }
  588. }
  589. // remove first page element if it's a <h1> or <h2>, because it's usually
  590. // the project name or the `README` string which we don't need
  591. $first = $dom->getElementsByTagName('body')->item(0);
  592. if ($first) {
  593. $first = $first->childNodes->item(0);
  594. }
  595. if ($first && ('h1' === $first->nodeName || 'h2' === $first->nodeName)) {
  596. $first->parentNode->removeChild($first);
  597. }
  598. $readme = $dom->saveHTML();
  599. $readme = substr($readme, strpos($readme, '<body>')+6);
  600. $readme = substr($readme, 0, strrpos($readme, '</body>'));
  601. return str_replace("\r\n", "\n", $readme);
  602. }
  603. private function sanitize($str)
  604. {
  605. // remove escape chars
  606. $str = preg_replace("{\x1B(?:\[.)?}u", '', $str);
  607. return preg_replace("{[\x01-\x1A]}u", '', $str);
  608. }
  609. }