Updater.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. <?php
  2. /*
  3. * This file is part of Packagist.
  4. *
  5. * (c) Jordi Boggiano <j.boggiano@seld.be>
  6. * Nils Adermann <naderman@naderman.de>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Packagist\WebBundle\Package;
  12. use cebe\markdown\GithubMarkdown;
  13. use Composer\Package\AliasPackage;
  14. use Composer\Package\PackageInterface;
  15. use Composer\Repository\RepositoryInterface;
  16. use Composer\Repository\VcsRepository;
  17. use Composer\Repository\Vcs\GitHubDriver;
  18. use Composer\Repository\InvalidRepositoryException;
  19. use Composer\Util\ErrorHandler;
  20. use Composer\Util\RemoteFilesystem;
  21. use Composer\Config;
  22. use Composer\IO\IOInterface;
  23. use Packagist\WebBundle\Entity\Author;
  24. use Packagist\WebBundle\Entity\Package;
  25. use Packagist\WebBundle\Entity\Tag;
  26. use Packagist\WebBundle\Entity\Version;
  27. use Packagist\WebBundle\Entity\VersionRepository;
  28. use Packagist\WebBundle\Entity\SuggestLink;
  29. use Symfony\Bridge\Doctrine\RegistryInterface;
  30. use Doctrine\DBAL\Connection;
  31. /**
  32. * @author Jordi Boggiano <j.boggiano@seld.be>
  33. */
  34. class Updater
  35. {
  36. const UPDATE_EQUAL_REFS = 1;
  37. const DELETE_BEFORE = 2;
  38. /**
  39. * Doctrine
  40. * @var RegistryInterface
  41. */
  42. protected $doctrine;
  43. /**
  44. * Supported link types
  45. * @var array
  46. */
  47. protected $supportedLinkTypes = array(
  48. 'require' => array(
  49. 'method' => 'getRequires',
  50. 'entity' => 'RequireLink',
  51. ),
  52. 'conflict' => array(
  53. 'method' => 'getConflicts',
  54. 'entity' => 'ConflictLink',
  55. ),
  56. 'provide' => array(
  57. 'method' => 'getProvides',
  58. 'entity' => 'ProvideLink',
  59. ),
  60. 'replace' => array(
  61. 'method' => 'getReplaces',
  62. 'entity' => 'ReplaceLink',
  63. ),
  64. 'devRequire' => array(
  65. 'method' => 'getDevRequires',
  66. 'entity' => 'DevRequireLink',
  67. ),
  68. );
  69. /**
  70. * Constructor
  71. *
  72. * @param RegistryInterface $doctrine
  73. */
  74. public function __construct(RegistryInterface $doctrine)
  75. {
  76. $this->doctrine = $doctrine;
  77. ErrorHandler::register();
  78. }
  79. /**
  80. * Update a project
  81. *
  82. * @param \Packagist\WebBundle\Entity\Package $package
  83. * @param RepositoryInterface $repository the repository instance used to update from
  84. * @param int $flags a few of the constants of this class
  85. * @param \DateTime $start
  86. */
  87. public function update(IOInterface $io, Config $config, Package $package, RepositoryInterface $repository, $flags = 0, \DateTime $start = null)
  88. {
  89. $rfs = new RemoteFilesystem($io, $config);
  90. if (null === $start) {
  91. $start = new \DateTime();
  92. }
  93. $deleteDate = clone $start;
  94. $deleteDate->modify('-1day');
  95. $em = $this->doctrine->getManager();
  96. $apc = extension_loaded('apcu');
  97. $rootIdentifier = null;
  98. if ($repository instanceof VcsRepository) {
  99. $cfg = $repository->getRepoConfig();
  100. if (isset($cfg['url']) && preg_match('{\bgithub\.com\b}', $cfg['url'])) {
  101. foreach ($package->getMaintainers() as $maintainer) {
  102. if (!($newGithubToken = $maintainer->getGithubToken())) {
  103. continue;
  104. }
  105. $valid = null;
  106. if ($apc) {
  107. $valid = apcu_fetch('is_token_valid_'.$maintainer->getUsernameCanonical());
  108. }
  109. if (true !== $valid) {
  110. $context = stream_context_create(['http' => ['header' => 'User-agent: packagist-token-check']]);
  111. $rate = json_decode(@file_get_contents('https://api.github.com/rate_limit?access_token='.$newGithubToken, false, $context), true);
  112. // invalid/outdated token, wipe it so we don't try it again
  113. if (!$rate && (strpos($http_response_header[0], '403') || strpos($http_response_header[0], '401'))) {
  114. $maintainer->setGithubToken(null);
  115. $em->flush($maintainer);
  116. continue;
  117. }
  118. }
  119. if ($apc) {
  120. apcu_store('is_token_valid_'.$maintainer->getUsernameCanonical(), true, 86400);
  121. }
  122. $io->setAuthentication('github.com', $newGithubToken, 'x-oauth-basic');
  123. break;
  124. }
  125. }
  126. $rootIdentifier = $repository->getDriver()->getRootIdentifier();
  127. }
  128. $versions = $repository->getPackages();
  129. usort($versions, function ($a, $b) {
  130. $aVersion = $a->getVersion();
  131. $bVersion = $b->getVersion();
  132. if ($aVersion === '9999999-dev' || 'dev-' === substr($aVersion, 0, 4)) {
  133. $aVersion = 'dev';
  134. }
  135. if ($bVersion === '9999999-dev' || 'dev-' === substr($bVersion, 0, 4)) {
  136. $bVersion = 'dev';
  137. }
  138. $aIsDev = $aVersion === 'dev' || substr($aVersion, -4) === '-dev';
  139. $bIsDev = $bVersion === 'dev' || substr($bVersion, -4) === '-dev';
  140. // push dev versions to the end
  141. if ($aIsDev !== $bIsDev) {
  142. return $aIsDev ? 1 : -1;
  143. }
  144. // equal versions are sorted by date
  145. if ($aVersion === $bVersion) {
  146. return $a->getReleaseDate() > $b->getReleaseDate() ? 1 : -1;
  147. }
  148. // the rest is sorted by version
  149. return version_compare($aVersion, $bVersion);
  150. });
  151. $versionRepository = $this->doctrine->getRepository('PackagistWebBundle:Version');
  152. if ($flags & self::DELETE_BEFORE) {
  153. foreach ($package->getVersions() as $version) {
  154. $versionRepository->remove($version);
  155. }
  156. $em->flush();
  157. $em->refresh($package);
  158. }
  159. $existingVersions = $versionRepository->getVersionMetadataForUpdate($package);
  160. $lastUpdated = true;
  161. $lastProcessed = null;
  162. $idsToMarkUpdated = [];
  163. foreach ($versions as $version) {
  164. if ($version instanceof AliasPackage) {
  165. continue;
  166. }
  167. if ($lastProcessed && $lastProcessed->getVersion() === $version->getVersion()) {
  168. $io->write('Skipping version '.$version->getPrettyVersion().' (duplicate of '.$lastProcessed->getPrettyVersion().')', true, IOInterface::VERBOSE);
  169. continue;
  170. }
  171. $lastProcessed = $version;
  172. $result = $this->updateInformation($versionRepository, $package, $existingVersions, $version, $flags, $rootIdentifier);
  173. $lastUpdated = $result['updated'];
  174. if ($lastUpdated) {
  175. $em->flush();
  176. $em->clear();
  177. $package = $em->merge($package);
  178. } else {
  179. $idsToMarkUpdated[] = $result['id'];
  180. }
  181. // mark the version processed so we can prune leftover ones
  182. unset($existingVersions[$result['version']]);
  183. }
  184. // mark versions that did not update as updated to avoid them being pruned
  185. $em->getConnection()->executeUpdate(
  186. 'UPDATE package_version SET updatedAt = :now, softDeletedAt = NULL WHERE id IN (:ids)',
  187. ['now' => date('Y-m-d H:i:s'), 'ids' => $idsToMarkUpdated],
  188. ['ids' => Connection::PARAM_INT_ARRAY]
  189. );
  190. // remove outdated versions
  191. foreach ($existingVersions as $version) {
  192. if (!is_null($version['softDeletedAt']) && new \DateTime($version['softDeletedAt']) < $deleteDate) {
  193. $versionRepository->remove($versionRepository->findOneById($version['id']));
  194. } else {
  195. // set it to be soft-deleted so next update that occurs after deleteDate (1day) if the
  196. // version is still missing it will be really removed
  197. $em->getConnection()->executeUpdate(
  198. 'UPDATE package_version SET softDeletedAt = :now WHERE id = :id',
  199. ['now' => date('Y-m-d H:i:s'), 'id' => $version['id']]
  200. );
  201. }
  202. }
  203. if (preg_match('{^(?:git://|git@|https?://)github.com[:/]([^/]+)/(.+?)(?:\.git|/)?$}i', $package->getRepository(), $match) && $repository instanceof VcsRepository) {
  204. $this->updateGitHubInfo($rfs, $package, $match[1], $match[2], $repository);
  205. } else {
  206. $this->updateReadme($io, $package, $repository);
  207. }
  208. $package->setUpdatedAt(new \DateTime);
  209. $package->setCrawledAt(new \DateTime);
  210. $em->flush();
  211. if ($repository->hadInvalidBranches()) {
  212. throw new InvalidRepositoryException('Some branches contained invalid data and were discarded, it is advised to review the log and fix any issues present in branches');
  213. }
  214. }
  215. /**
  216. * @return array with keys:
  217. * - updated (whether the version was updated or needs to be marked as updated)
  218. * - id (version id, can be null for newly created versions)
  219. * - version (normalized version from the composer package)
  220. * - object (Version instance if it was updated)
  221. */
  222. private function updateInformation(VersionRepository $versionRepo, Package $package, array $existingVersions, PackageInterface $data, $flags, $rootIdentifier)
  223. {
  224. $em = $this->doctrine->getManager();
  225. $version = new Version();
  226. $normVersion = $data->getVersion();
  227. $existingVersion = $existingVersions[strtolower($normVersion)] ?? null;
  228. if ($existingVersion) {
  229. $source = $existingVersion['source'];
  230. // update if the right flag is set, or the source reference has changed (re-tag or new commit on branch)
  231. if ($source['reference'] !== $data->getSourceReference() || ($flags & self::UPDATE_EQUAL_REFS)) {
  232. $version = $versionRepo->findOneById($existingVersion['id']);
  233. } else {
  234. return ['updated' => false, 'id' => $existingVersion['id'], 'version' => strtolower($normVersion), 'object' => null];
  235. }
  236. }
  237. $version->setName($package->getName());
  238. $version->setVersion($data->getPrettyVersion());
  239. $version->setNormalizedVersion($normVersion);
  240. $version->setDevelopment($data->isDev());
  241. $em->persist($version);
  242. $descr = $this->sanitize($data->getDescription());
  243. $version->setDescription($descr);
  244. // update the package description only for the default branch
  245. if ($rootIdentifier === null || preg_replace('{dev-|-dev}', '', $version->getVersion()) === $rootIdentifier) {
  246. $package->setDescription($descr);
  247. }
  248. $version->setHomepage($data->getHomepage());
  249. $version->setLicense($data->getLicense() ?: array());
  250. $version->setPackage($package);
  251. $version->setUpdatedAt(new \DateTime);
  252. $version->setSoftDeletedAt(null);
  253. $version->setReleasedAt($data->getReleaseDate());
  254. if ($data->getSourceType()) {
  255. $source['type'] = $data->getSourceType();
  256. $source['url'] = $data->getSourceUrl();
  257. $source['reference'] = $data->getSourceReference();
  258. $version->setSource($source);
  259. } else {
  260. $version->setSource(null);
  261. }
  262. if ($data->getDistType()) {
  263. $dist['type'] = $data->getDistType();
  264. $dist['url'] = $data->getDistUrl();
  265. $dist['reference'] = $data->getDistReference();
  266. $dist['shasum'] = $data->getDistSha1Checksum();
  267. $version->setDist($dist);
  268. } else {
  269. $version->setDist(null);
  270. }
  271. if ($data->getType()) {
  272. $type = $this->sanitize($data->getType());
  273. $version->setType($type);
  274. if ($type !== $package->getType()) {
  275. $package->setType($type);
  276. }
  277. }
  278. $version->setTargetDir($data->getTargetDir());
  279. $version->setAutoload($data->getAutoload());
  280. $version->setExtra($data->getExtra());
  281. $version->setBinaries($data->getBinaries());
  282. $version->setIncludePaths($data->getIncludePaths());
  283. $version->setSupport($data->getSupport());
  284. if ($data->getKeywords()) {
  285. $keywords = array();
  286. foreach ($data->getKeywords() as $keyword) {
  287. $keywords[mb_strtolower($keyword, 'UTF-8')] = $keyword;
  288. }
  289. $existingTags = [];
  290. foreach ($version->getTags() as $tag) {
  291. $existingTags[mb_strtolower($tag->getName(), 'UTF-8')] = $tag;
  292. }
  293. foreach ($keywords as $tagKey => $keyword) {
  294. if (isset($existingTags[$tagKey])) {
  295. unset($existingTags[$tagKey]);
  296. continue;
  297. }
  298. $tag = Tag::getByName($em, $keyword, true);
  299. if (!$version->getTags()->contains($tag)) {
  300. $version->addTag($tag);
  301. }
  302. }
  303. foreach ($existingTags as $tag) {
  304. $version->getTags()->removeElement($tag);
  305. }
  306. } elseif (count($version->getTags())) {
  307. $version->getTags()->clear();
  308. }
  309. $authorRepository = $this->doctrine->getRepository('PackagistWebBundle:Author');
  310. $version->getAuthors()->clear();
  311. if ($data->getAuthors()) {
  312. foreach ($data->getAuthors() as $authorData) {
  313. $author = null;
  314. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  315. if (isset($authorData[$field])) {
  316. $authorData[$field] = trim($authorData[$field]);
  317. if ('' === $authorData[$field]) {
  318. $authorData[$field] = null;
  319. }
  320. } else {
  321. $authorData[$field] = null;
  322. }
  323. }
  324. // skip authors with no information
  325. if (!isset($authorData['email']) && !isset($authorData['name'])) {
  326. continue;
  327. }
  328. $author = $authorRepository->findOneBy(array(
  329. 'email' => $authorData['email'],
  330. 'name' => $authorData['name'],
  331. 'homepage' => $authorData['homepage'],
  332. 'role' => $authorData['role'],
  333. ));
  334. if (!$author) {
  335. $author = new Author();
  336. $em->persist($author);
  337. }
  338. foreach (array('email', 'name', 'homepage', 'role') as $field) {
  339. if (isset($authorData[$field])) {
  340. $author->{'set'.$field}($authorData[$field]);
  341. }
  342. }
  343. // only update the author timestamp once a month at most as the value is kinda unused
  344. if ($author->getUpdatedAt() === null || $author->getUpdatedAt()->getTimestamp() < time() - 86400 * 30) {
  345. $author->setUpdatedAt(new \DateTime);
  346. }
  347. if (!$version->getAuthors()->contains($author)) {
  348. $version->addAuthor($author);
  349. }
  350. }
  351. }
  352. // handle links
  353. foreach ($this->supportedLinkTypes as $linkType => $opts) {
  354. $links = array();
  355. foreach ($data->{$opts['method']}() as $link) {
  356. $constraint = $link->getPrettyConstraint();
  357. if (false !== strpos($constraint, ',') && false !== strpos($constraint, '@')) {
  358. $constraint = preg_replace_callback('{([><]=?\s*[^@]+?)@([a-z]+)}i', function ($matches) {
  359. if ($matches[2] === 'stable') {
  360. return $matches[1];
  361. }
  362. return $matches[1].'-'.$matches[2];
  363. }, $constraint);
  364. }
  365. $links[$link->getTarget()] = $constraint;
  366. }
  367. foreach ($version->{'get'.$linkType}() as $link) {
  368. // clear links that have changed/disappeared (for updates)
  369. if (!isset($links[$link->getPackageName()]) || $links[$link->getPackageName()] !== $link->getPackageVersion()) {
  370. $version->{'get'.$linkType}()->removeElement($link);
  371. $em->remove($link);
  372. } else {
  373. // clear those that are already set
  374. unset($links[$link->getPackageName()]);
  375. }
  376. }
  377. foreach ($links as $linkPackageName => $linkPackageVersion) {
  378. $class = 'Packagist\WebBundle\Entity\\'.$opts['entity'];
  379. $link = new $class;
  380. $link->setPackageName($linkPackageName);
  381. $link->setPackageVersion($linkPackageVersion);
  382. $version->{'add'.$linkType.'Link'}($link);
  383. $link->setVersion($version);
  384. $em->persist($link);
  385. }
  386. }
  387. // handle suggests
  388. if ($suggests = $data->getSuggests()) {
  389. foreach ($version->getSuggest() as $link) {
  390. // clear links that have changed/disappeared (for updates)
  391. if (!isset($suggests[$link->getPackageName()]) || $suggests[$link->getPackageName()] !== $link->getPackageVersion()) {
  392. $version->getSuggest()->removeElement($link);
  393. $em->remove($link);
  394. } else {
  395. // clear those that are already set
  396. unset($suggests[$link->getPackageName()]);
  397. }
  398. }
  399. foreach ($suggests as $linkPackageName => $linkPackageVersion) {
  400. $link = new SuggestLink;
  401. $link->setPackageName($linkPackageName);
  402. $link->setPackageVersion($linkPackageVersion);
  403. $version->addSuggestLink($link);
  404. $link->setVersion($version);
  405. $em->persist($link);
  406. }
  407. } elseif (count($version->getSuggest())) {
  408. // clear existing suggests if present
  409. foreach ($version->getSuggest() as $link) {
  410. $em->remove($link);
  411. }
  412. $version->getSuggest()->clear();
  413. }
  414. return ['updated' => true, 'id' => $version->getId(), 'version' => strtolower($normVersion), 'object' => $version];
  415. }
  416. /**
  417. * Update the readme for $package from $repository.
  418. *
  419. * @param IOInterface $io
  420. * @param Package $package
  421. * @param VcsRepository $repository
  422. */
  423. private function updateReadme(IOInterface $io, Package $package, VcsRepository $repository)
  424. {
  425. try {
  426. $driver = $repository->getDriver();
  427. $composerInfo = $driver->getComposerInformation($driver->getRootIdentifier());
  428. if (isset($composerInfo['readme'])) {
  429. $readmeFile = $composerInfo['readme'];
  430. } else {
  431. $readmeFile = 'README.md';
  432. }
  433. $ext = substr($readmeFile, strrpos($readmeFile, '.'));
  434. if ($ext === $readmeFile) {
  435. $ext = '.txt';
  436. }
  437. switch ($ext) {
  438. case '.txt':
  439. $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
  440. if (!empty($source)) {
  441. $package->setReadme('<pre>' . htmlspecialchars($source) . '</pre>');
  442. }
  443. break;
  444. case '.md':
  445. $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
  446. $parser = new GithubMarkdown();
  447. $readme = $parser->parse($source);
  448. if (!empty($readme)) {
  449. $package->setReadme($this->prepareReadme($readme));
  450. }
  451. break;
  452. }
  453. } catch (\Exception $e) {
  454. // we ignore all errors for this minor function
  455. $io->write(
  456. 'Can not update readme. Error: ' . $e->getMessage(),
  457. true,
  458. IOInterface::VERBOSE
  459. );
  460. }
  461. }
  462. private function updateGitHubInfo(RemoteFilesystem $rfs, Package $package, $owner, $repo, VcsRepository $repository)
  463. {
  464. $baseApiUrl = 'https://api.github.com/repos/'.$owner.'/'.$repo;
  465. $driver = $repository->getDriver();
  466. if (!$driver instanceof GitHubDriver) {
  467. return;
  468. }
  469. $repoData = $driver->getRepoData();
  470. try {
  471. $opts = ['http' => ['header' => ['Accept: application/vnd.github.v3.html']]];
  472. $readme = $rfs->getContents('github.com', $baseApiUrl.'/readme', false, $opts);
  473. } catch (\Exception $e) {
  474. if (!$e instanceof \Composer\Downloader\TransportException || $e->getCode() !== 404) {
  475. return;
  476. }
  477. // 404s just mean no readme present so we proceed with the rest
  478. }
  479. if (!empty($readme)) {
  480. $package->setReadme($this->prepareReadme($readme, true, $owner, $repo));
  481. }
  482. if (!empty($repoData['language'])) {
  483. $package->setLanguage($repoData['language']);
  484. }
  485. if (isset($repoData['stargazers_count'])) {
  486. $package->setGitHubStars($repoData['stargazers_count']);
  487. }
  488. if (isset($repoData['subscribers_count'])) {
  489. $package->setGitHubWatches($repoData['subscribers_count']);
  490. }
  491. if (isset($repoData['network_count'])) {
  492. $package->setGitHubForks($repoData['network_count']);
  493. }
  494. if (isset($repoData['open_issues_count'])) {
  495. $package->setGitHubOpenIssues($repoData['open_issues_count']);
  496. }
  497. }
  498. /**
  499. * Prepare the readme by stripping elements and attributes that are not supported .
  500. *
  501. * @param string $readme
  502. * @param bool $isGithub
  503. * @param null $owner
  504. * @param null $repo
  505. * @return string
  506. */
  507. private function prepareReadme($readme, $isGithub = false, $owner = null, $repo = null)
  508. {
  509. $elements = array(
  510. 'p',
  511. 'br',
  512. 'small',
  513. 'strong', 'b',
  514. 'em', 'i',
  515. 'strike',
  516. 'sub', 'sup',
  517. 'ins', 'del',
  518. 'ol', 'ul', 'li',
  519. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  520. 'dl', 'dd', 'dt',
  521. 'pre', 'code', 'samp', 'kbd',
  522. 'q', 'blockquote', 'abbr', 'cite',
  523. 'table', 'thead', 'tbody', 'th', 'tr', 'td',
  524. 'a', 'span',
  525. 'img',
  526. );
  527. $attributes = array(
  528. 'img.src', 'img.title', 'img.alt', 'img.width', 'img.height', 'img.style',
  529. 'a.href', 'a.target', 'a.rel', 'a.id',
  530. 'td.colspan', 'td.rowspan', 'th.colspan', 'th.rowspan',
  531. '*.class'
  532. );
  533. $config = \HTMLPurifier_Config::createDefault();
  534. $config->set('HTML.AllowedElements', implode(',', $elements));
  535. $config->set('HTML.AllowedAttributes', implode(',', $attributes));
  536. $config->set('Attr.EnableID', true);
  537. $config->set('Attr.AllowedFrameTargets', ['_blank']);
  538. $purifier = new \HTMLPurifier($config);
  539. $readme = $purifier->purify($readme);
  540. $dom = new \DOMDocument();
  541. $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
  542. // Links can not be trusted, mark them nofollow and convert relative to absolute links
  543. $links = $dom->getElementsByTagName('a');
  544. foreach ($links as $link) {
  545. $link->setAttribute('rel', 'nofollow noindex noopener external');
  546. if ('#' === substr($link->getAttribute('href'), 0, 1)) {
  547. $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
  548. } elseif ('mailto:' === substr($link->getAttribute('href'), 0, 7)) {
  549. // do nothing
  550. } elseif ($isGithub && false === strpos($link->getAttribute('href'), '//')) {
  551. $link->setAttribute(
  552. 'href',
  553. 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$link->getAttribute('href')
  554. );
  555. }
  556. }
  557. if ($isGithub) {
  558. // convert relative to absolute images
  559. $images = $dom->getElementsByTagName('img');
  560. foreach ($images as $img) {
  561. if (false === strpos($img->getAttribute('src'), '//')) {
  562. $img->setAttribute(
  563. 'src',
  564. 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$img->getAttribute('src')
  565. );
  566. }
  567. }
  568. }
  569. // remove first page element if it's a <h1> or <h2>, because it's usually
  570. // the project name or the `README` string which we don't need
  571. $first = $dom->getElementsByTagName('body')->item(0);
  572. if ($first) {
  573. $first = $first->childNodes->item(0);
  574. }
  575. if ($first && ('h1' === $first->nodeName || 'h2' === $first->nodeName)) {
  576. $first->parentNode->removeChild($first);
  577. }
  578. $readme = $dom->saveHTML();
  579. $readme = substr($readme, strpos($readme, '<body>')+6);
  580. $readme = substr($readme, 0, strrpos($readme, '</body>'));
  581. return str_replace("\r\n", "\n", $readme);
  582. }
  583. private function sanitize($str)
  584. {
  585. // remove escape chars
  586. $str = preg_replace("{\x1B(?:\[.)?}u", '', $str);
  587. return preg_replace("{[\x01-\x1A]}u", '', $str);
  588. }
  589. }