Browse Source

add readme support for non github repos

Thomas Flori 8 years ago
parent
commit
68a5012a7f
3 changed files with 209 additions and 72 deletions
  1. 2 1
      composer.json
  2. 62 2
      composer.lock
  3. 145 69
      src/Packagist/WebBundle/Package/Updater.php

+ 2 - 1
composer.json

@@ -54,7 +54,8 @@
         "pagerfanta/pagerfanta": "^1.0",
         "knplabs/knp-menu-bundle": "^2.1",
         "ezyang/htmlpurifier": "^4.6",
-        "nelmio/cors-bundle": "^1.4"
+        "nelmio/cors-bundle": "^1.4",
+        "cebe/markdown": "^1.1"
     },
     "_comment": ["fos user bundle 2.0.0 tag needed"],
     "require-dev": {

+ 62 - 2
composer.lock

@@ -4,9 +4,69 @@
         "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
         "This file is @generated automatically"
     ],
-    "hash": "1205c19862d30b1e5135fea732809a72",
-    "content-hash": "ca089bd944842e20cdbdf556dad7c4f0",
+    "hash": "3d7826bcd5b94b1502cbb55942e6a2de",
+    "content-hash": "7c0bb648a819dea55af72094112d1666",
     "packages": [
+        {
+            "name": "cebe/markdown",
+            "version": "1.1.1",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/cebe/markdown.git",
+                "reference": "c30eb5e01fe021cc5bba2f9ee0eeef96d4931166"
+            },
+            "dist": {
+                "type": "zip",
+                "url": "https://api.github.com/repos/cebe/markdown/zipball/c30eb5e01fe021cc5bba2f9ee0eeef96d4931166",
+                "reference": "c30eb5e01fe021cc5bba2f9ee0eeef96d4931166",
+                "shasum": ""
+            },
+            "require": {
+                "lib-pcre": "*",
+                "php": ">=5.4.0"
+            },
+            "require-dev": {
+                "cebe/indent": "*",
+                "facebook/xhprof": "*@dev",
+                "phpunit/phpunit": "4.1.*"
+            },
+            "bin": [
+                "bin/markdown"
+            ],
+            "type": "library",
+            "extra": {
+                "branch-alias": {
+                    "dev-master": "1.1.x-dev"
+                }
+            },
+            "autoload": {
+                "psr-4": {
+                    "cebe\\markdown\\": ""
+                }
+            },
+            "notification-url": "https://packagist.org/downloads/",
+            "license": [
+                "MIT"
+            ],
+            "authors": [
+                {
+                    "name": "Carsten Brandt",
+                    "email": "mail@cebe.cc",
+                    "homepage": "http://cebe.cc/",
+                    "role": "Creator"
+                }
+            ],
+            "description": "A super fast, highly extensible markdown parser for PHP",
+            "homepage": "https://github.com/cebe/markdown#readme",
+            "keywords": [
+                "extensible",
+                "fast",
+                "gfm",
+                "markdown",
+                "markdown-extra"
+            ],
+            "time": "2016-09-14 20:40:20"
+        },
         {
             "name": "composer/ca-bundle",
             "version": "1.0.4",

+ 145 - 69
src/Packagist/WebBundle/Package/Updater.php

@@ -12,6 +12,7 @@
 
 namespace Packagist\WebBundle\Package;
 
+use cebe\markdown\GithubMarkdown;
 use Composer\Package\AliasPackage;
 use Composer\Package\PackageInterface;
 use Composer\Repository\RepositoryInterface;
@@ -214,6 +215,8 @@ class Updater
 
         if (preg_match('{^(?:git://|git@|https?://)github.com[:/]([^/]+)/(.+?)(?:\.git|/)?$}i', $package->getRepository(), $match) && $repository instanceof VcsRepository) {
             $this->updateGitHubInfo($rfs, $package, $match[1], $match[2], $repository);
+        } else {
+            $this->updateReadme($io, $package, $repository);
         }
 
         $package->setUpdatedAt(new \DateTime);
@@ -456,6 +459,56 @@ class Updater
         return true;
     }
 
+    /**
+     * Update the readme for $package from $repository.
+     *
+     * @param IOInterface $io
+     * @param Package $package
+     * @param VcsRepository $repository
+     */
+    private function updateReadme(IOInterface $io, Package $package, VcsRepository $repository)
+    {
+        try {
+            $driver = $repository->getDriver();
+            $composerInfo = $driver->getComposerInformation($driver->getRootIdentifier());
+
+            if (isset($composerInfo['readme'])) {
+                $readmeFile = $composerInfo['readme'];
+                $ext = substr($readmeFile, strrpos($readmeFile, '.'));
+
+                if ($ext === $readmeFile) {
+                    $ext = '.txt';
+                }
+
+                switch ($ext) {
+                    case '.txt':
+                        $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
+                        $package->setReadme('<pre>' . htmlspecialchars($source) . '</pre>');
+                        break;
+
+                    case '.md':
+                        $source = $driver->getFileContent($readmeFile, $driver->getRootIdentifier());
+                        $parser = new GithubMarkdown();
+                        $readme = $parser->parse($source);
+
+                        if (!empty($readme)) {
+                            $package->setReadme($this->prepareReadme($readme));
+                        }
+                        break;
+                }
+
+            }
+
+        } catch (\Exception $e) {
+            // we ignore all errors for this minor function
+            $io->write(
+                'Can not update readme. Error: ' . $e->getMessage(),
+                true,
+                IOInterface::VERBOSE
+            );
+        }
+    }
+
     private function updateGitHubInfo(RemoteFilesystem $rfs, Package $package, $owner, $repo, VcsRepository $repository)
     {
         $baseApiUrl = 'https://api.github.com/repos/'.$owner.'/'.$repo;
@@ -478,75 +531,7 @@ class Updater
         }
 
         if (!empty($readme)) {
-            $elements = array(
-                'p',
-                'br',
-                'small',
-                'strong', 'b',
-                'em', 'i',
-                'strike',
-                'sub', 'sup',
-                'ins', 'del',
-                'ol', 'ul', 'li',
-                'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
-                'dl', 'dd', 'dt',
-                'pre', 'code', 'samp', 'kbd',
-                'q', 'blockquote', 'abbr', 'cite',
-                'table', 'thead', 'tbody', 'th', 'tr', 'td',
-                'a', 'span',
-                'img',
-            );
-            $config = \HTMLPurifier_Config::createDefault();
-            $config->set('HTML.AllowedElements', implode(',', $elements));
-            $config->set('HTML.AllowedAttributes',
-                'img.src,img.title,img.alt,img.width,img.height,img.style,'.
-                'a.href,a.target,a.rel,a.id,'.
-                'td.colspan,td.rowspan,th.colspan,th.rowspan,'.
-                '*.class'
-            );
-            $config->set('Attr.EnableID', true);
-            $config->set('Attr.AllowedFrameTargets', ['_blank']);
-            $purifier = new \HTMLPurifier($config);
-            $readme = $purifier->purify($readme);
-
-            $dom = new \DOMDocument();
-            $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
-
-            // Links can not be trusted, mark them nofollow and convert relative to absolute links
-            $links = $dom->getElementsByTagName('a');
-            foreach ($links as $link) {
-                $link->setAttribute('rel', 'nofollow noopener external');
-                if ('#' === substr($link->getAttribute('href'), 0, 1)) {
-                    $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
-                } elseif ('mailto:' === substr($link->getAttribute('href'), 0, 7)) {
-                    // do nothing
-                } elseif (false === strpos($link->getAttribute('href'), '//')) {
-                    $link->setAttribute('href', 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$link->getAttribute('href'));
-                }
-            }
-
-            // convert relative to absolute images
-            $images = $dom->getElementsByTagName('img');
-            foreach ($images as $img) {
-                if (false === strpos($img->getAttribute('src'), '//')) {
-                    $img->setAttribute('src', 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$img->getAttribute('src'));
-                }
-            }
-
-            // remove first title as it's usually the project name which we don't need
-            if ($dom->getElementsByTagName('h1')->length) {
-                $first = $dom->getElementsByTagName('h1')->item(0);
-                $first->parentNode->removeChild($first);
-            } elseif ($dom->getElementsByTagName('h2')->length) {
-                $first = $dom->getElementsByTagName('h2')->item(0);
-                $first->parentNode->removeChild($first);
-            }
-
-            $readme = $dom->saveHTML();
-            $readme = substr($readme, strpos($readme, '<body>')+6);
-            $readme = substr($readme, 0, strrpos($readme, '</body>'));
-
-            $package->setReadme($readme);
+            $package->setReadme($this->prepareReadme($readme, true, $owner, $repo));
         }
 
         if (!empty($repoData['language'])) {
@@ -566,6 +551,97 @@ class Updater
         }
     }
 
+    /**
+     * Prepare the readme by stripping elements and attributes that are not supported .
+     *
+     * @param string $readme
+     * @param bool $isGithub
+     * @param null $owner
+     * @param null $repo
+     * @return string
+     */
+    private function prepareReadme($readme, $isGithub = false, $owner = null, $repo = null)
+    {
+        $elements = array(
+            'p',
+            'br',
+            'small',
+            'strong', 'b',
+            'em', 'i',
+            'strike',
+            'sub', 'sup',
+            'ins', 'del',
+            'ol', 'ul', 'li',
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+            'dl', 'dd', 'dt',
+            'pre', 'code', 'samp', 'kbd',
+            'q', 'blockquote', 'abbr', 'cite',
+            'table', 'thead', 'tbody', 'th', 'tr', 'td',
+            'a', 'span',
+            'img',
+        );
+
+        $attributes = array(
+            'img.src', 'img.title', 'img.alt', 'img.width', 'img.height', 'img.style',
+            'a.href', 'a.target', 'a.rel', 'a.id',
+            'td.colspan', 'td.rowspan', 'th.colspan', 'th.rowspan',
+            '*.class'
+        );
+
+        $config = \HTMLPurifier_Config::createDefault();
+        $config->set('HTML.AllowedElements', implode(',', $elements));
+        $config->set('HTML.AllowedAttributes', implode(',', $attributes));
+        $config->set('Attr.EnableID', true);
+        $config->set('Attr.AllowedFrameTargets', ['_blank']);
+        $purifier = new \HTMLPurifier($config);
+        $readme = $purifier->purify($readme);
+
+        $dom = new \DOMDocument();
+        $dom->loadHTML('<?xml encoding="UTF-8">' . $readme);
+
+        // Links can not be trusted, mark them nofollow and convert relative to absolute links
+        $links = $dom->getElementsByTagName('a');
+        foreach ($links as $link) {
+            $link->setAttribute('rel', 'nofollow noopener external');
+            if ('#' === substr($link->getAttribute('href'), 0, 1)) {
+                $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1));
+            } elseif ('mailto:' === substr($link->getAttribute('href'), 0, 7)) {
+                // do nothing
+            } elseif ($isGithub && false === strpos($link->getAttribute('href'), '//')) {
+                $link->setAttribute(
+                    'href',
+                    'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$link->getAttribute('href')
+                );
+            }
+        }
+
+        // convert relative to absolute images
+        $images = $dom->getElementsByTagName('img');
+        foreach ($images as $img) {
+            if ($isGithub && false === strpos($img->getAttribute('src'), '//')) {
+                $img->setAttribute(
+                    'src',
+                    'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$img->getAttribute('src')
+                );
+            }
+        }
+
+        // remove first title as it's usually the project name which we don't need
+        if ($dom->getElementsByTagName('h1')->length) {
+            $first = $dom->getElementsByTagName('h1')->item(0);
+            $first->parentNode->removeChild($first);
+        } elseif ($dom->getElementsByTagName('h2')->length) {
+            $first = $dom->getElementsByTagName('h2')->item(0);
+            $first->parentNode->removeChild($first);
+        }
+
+        $readme = $dom->saveHTML();
+        $readme = substr($readme, strpos($readme, '<body>')+6);
+        $readme = substr($readme, 0, strrpos($readme, '</body>'));
+
+        return $readme;
+    }
+
     private function sanitize($str)
     {
         // remove escape chars