14 rokov pred · 97d983035b
--- a/README.md
+++ b/README.md
@@ -21,16 +21,7 @@ The search index uses [Solr](http://lucene.apache.org/solr/), so you will have t
 
				 If you are running it on a non-standard host or port, you will have to adjust the configuration. See the
			
 
				 [NelmioSolariumBundle](https://github.com/nelmio/NelmioSolariumBundle) for more details.
			
 
				 
			
 
				-You will also have to configure Solr. The standard `schema.xml` already covers most fields like `title` and
			
 
				-`description`. The following need to be added though:
			
 
				-
			
 
				-    <fields>
			
 
				-        ...
			
 
				-
			
 
				-        <field name="tags" type="text_general" indexed="true" stored="true" multiValued="true"/>
			
 
				-
			
 
				-        ....
			
 
				-    </fields>
			
 
				+You will also have to configure Solr. Use the `schema.xml` provided in the doc/ directory for that.
			
 
				 
			
 
				 To index packages, just run `app/console packagist:index`. It is recommended to set up a cron job for
			
 
				 this command, and have it run every few minutes.
			
--- a/doc/schema.xml
+++ b/doc/schema.xml
@@ -0,0 +1,272 @@
 
				+<?xml version="1.0" encoding="UTF-8" ?>

			
 
				+<schema name="packagist" version="1.4">

			
 
				+  <types>

			
 
				+    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->

			
 
				+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>

			
 
				+    <!-- boolean type: "true" or "false" -->

			
 
				+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>

			
 
				+    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->

			
 
				+    <fieldtype name="binary" class="solr.BinaryField"/>

			
 
				+

			
 
				+    <!-- The optional sortMissingLast and sortMissingFirst attributes are

			
 
				+         currently supported on types that are sorted internally as strings

			
 
				+         and on numeric types.

			
 
				+	       This includes "string","boolean", and, as of 3.5 (and 4.x),

			
 
				+	       int, float, long, date, double, including the "Trie" variants.

			
 
				+       - If sortMissingLast="true", then a sort on this field will cause documents

			
 
				+         without the field to come after documents with the field,

			
 
				+         regardless of the requested sort order (asc or desc).

			
 
				+       - If sortMissingFirst="true", then a sort on this field will cause documents

			
 
				+         without the field to come before documents with the field,

			
 
				+         regardless of the requested sort order.

			
 
				+       - If sortMissingLast="false" and sortMissingFirst="false" (the default),

			
 
				+         then default lucene sorting will be used which places docs without the

			
 
				+         field first in an ascending sort and last in a descending sort.

			
 
				+    -->    

			
 
				+

			
 
				+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

			
 
				+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

			
 
				+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

			
 
				+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

			
 
				+

			
 
				+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>

			
 
				+

			
 
				+    <!-- A Trie based date field for faster date range queries and date faceting. -->

			
 
				+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

			
 
				+

			
 
				+    <!-- A edge-ngram'd text field that can be used for wildcard matching -->

			
 
				+    <fieldType name="text_edgengram" class="solr.TextField" positionIncrementGap="100">

			
 
				+        <analyzer type="index">

			
 
				+            <tokenizer class="solr.KeywordTokenizerFactory"/>

			
 
				+            <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+            <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="100" />

			
 
				+        </analyzer>

			
 
				+        <analyzer type="query">

			
 
				+            <tokenizer class="solr.KeywordTokenizerFactory"/>

			
 
				+            <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <!-- A general text field that has reasonable, generic

			
 
				+         cross-language defaults: it tokenizes with StandardTokenizer,

			
 
				+	 removes stop words from case-insensitive "stopwords.txt"

			
 
				+	 (empty by default), and down cases.  At query time only, it

			
 
				+	 also applies synonyms. -->

			
 
				+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">

			
 
				+      <analyzer type="index">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+      <analyzer type="query">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <!-- Just like text_general except it reverses the characters of

			
 
				+	 each token, to enable more efficient leading wildcard queries. -->

			
 
				+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">

			
 
				+      <analyzer type="index">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"

			
 
				+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>

			
 
				+      </analyzer>

			
 
				+      <analyzer type="query">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

			
 
				+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <!-- A text field with defaults appropriate for English: it

			
 
				+         tokenizes with StandardTokenizer, removes English stop words

			
 
				+         (stopwords_en.txt), down cases, protects words from protwords.txt, and

			
 
				+         finally applies Porter's stemming.  The query time analyzer

			
 
				+         also applies synonyms from synonyms.txt. -->

			
 
				+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">

			
 
				+      <analyzer type="index">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <!-- in this example, we will only use synonyms at query time

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>

			
 
				+        -->

			
 
				+        <!-- Case insensitive stop word removal.

			
 
				+          add enablePositionIncrements=true in both the index and query

			
 
				+          analyzers to leave a 'gap' for more accurate phrase queries.

			
 
				+        -->

			
 
				+        <filter class="solr.StopFilterFactory"

			
 
				+                ignoreCase="true"

			
 
				+                words="stopwords_en.txt"

			
 
				+                enablePositionIncrements="true"

			
 
				+                />

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.EnglishPossessiveFilterFactory"/>

			
 
				+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>

			
 
				+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:

			
 
				+        <filter class="solr.EnglishMinimalStemFilterFactory"/>

			
 
				+	-->

			
 
				+        <filter class="solr.PorterStemFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+      <analyzer type="query">

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

			
 
				+        <filter class="solr.StopFilterFactory"

			
 
				+                ignoreCase="true"

			
 
				+                words="stopwords_en.txt"

			
 
				+                enablePositionIncrements="true"

			
 
				+                />

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.EnglishPossessiveFilterFactory"/>

			
 
				+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>

			
 
				+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:

			
 
				+        <filter class="solr.EnglishMinimalStemFilterFactory"/>

			
 
				+	-->

			
 
				+        <filter class="solr.PorterStemFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <!-- A text field with defaults appropriate for English, plus

			
 
				+	 aggressive word-splitting and autophrase features enabled.

			
 
				+	 This field is just like text_en, except it adds

			
 
				+	 WordDelimiterFilter to enable splitting and matching of

			
 
				+	 words on case-change, alpha numeric boundaries, and

			
 
				+	 non-alphanumeric chars.  This means certain compound word

			
 
				+	 cases will work, for example query "wi fi" will match

			
 
				+	 document "WiFi" or "wi-fi".  However, other cases will still

			
 
				+	 not match, for example if the query is "wifi" and the

			
 
				+	 document is "wi fi" or if the query is "wi-fi" and the

			
 
				+	 document is "wifi".

			
 
				+        -->

			
 
				+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">

			
 
				+      <analyzer type="index">

			
 
				+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

			
 
				+        <!-- in this example, we will only use synonyms at query time

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>

			
 
				+        -->

			
 
				+        <!-- Case insensitive stop word removal.

			
 
				+          add enablePositionIncrements=true in both the index and query

			
 
				+          analyzers to leave a 'gap' for more accurate phrase queries.

			
 
				+        -->

			
 
				+        <filter class="solr.StopFilterFactory"

			
 
				+                ignoreCase="true"

			
 
				+                words="stopwords_en.txt"

			
 
				+                enablePositionIncrements="true"

			
 
				+                />

			
 
				+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>

			
 
				+        <filter class="solr.PorterStemFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+      <analyzer type="query">

			
 
				+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

			
 
				+        <filter class="solr.StopFilterFactory"

			
 
				+                ignoreCase="true"

			
 
				+                words="stopwords_en.txt"

			
 
				+                enablePositionIncrements="true"

			
 
				+                />

			
 
				+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>

			
 
				+        <filter class="solr.PorterStemFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,

			
 
				+         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->

			
 
				+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">

			
 
				+      <analyzer>

			
 
				+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

			
 
				+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>

			
 
				+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>

			
 
				+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>

			
 
				+        <filter class="solr.LowerCaseFilterFactory"/>

			
 
				+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>

			
 
				+        <filter class="solr.EnglishMinimalStemFilterFactory"/>

			
 
				+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes

			
 
				+             possible with WordDelimiterFilter in conjuncton with stemming. -->

			
 
				+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>

			
 
				+      </analyzer>

			
 
				+    </fieldType>

			
 
				+

			
 
				+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >

			
 
				+      <analyzer>

			
 
				+        <tokenizer class="solr.StandardTokenizerFactory"/>

			
 
				+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>

			
 
				+      </analyzer>

			
 
				+    </fieldtype>

			
 
				+

			
 
				+    <!-- since fields of this type are by default not stored or indexed,

			
 
				+         any data added to them will be ignored outright.  --> 

			
 
				+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

			
 
				+ </types>

			
 
				+

			
 
				+ <fields>

			
 
				+   <!-- Valid attributes for fields:

			
 
				+     name: mandatory - the name for the field

			
 
				+     type: mandatory - the name of a previously defined type from the 

			
 
				+       <types> section

			
 
				+     indexed: true if this field should be indexed (searchable or sortable)

			
 
				+     stored: true if this field should be retrievable

			
 
				+     multiValued: true if this field may contain multiple values per document

			
 
				+     omitNorms: (expert) set to true to omit the norms associated with

			
 
				+       this field (this disables length normalization and index-time

			
 
				+       boosting for the field, and saves some memory).  Only full-text

			
 
				+       fields or fields that need an index-time boost need norms.

			
 
				+     termVectors: [false] set to true to store the term vector for a

			
 
				+       given field.

			
 
				+       When using MoreLikeThis, fields used for similarity should be

			
 
				+       stored for best performance.

			
 
				+     termPositions: Store position information with the term vector.  

			
 
				+       This will increase storage costs.

			
 
				+     termOffsets: Store offset information with the term vector. This 

			
 
				+       will increase storage costs.

			
 
				+     default: a value that should be used if no value is specified

			
 
				+       when adding a document.

			
 
				+   -->

			
 
				+

			
 
				+   <field name="id" type="string" indexed="true" stored="true" required="true" /> 

			
 
				+   <field name="name" type="text_general_rev" indexed="true" stored="true"/>

			
 
				+   <field name="description" type="text_general_rev" indexed="true" stored="true"/>

			
 
				+   <field name="tags" type="text_general_rev" indexed="true" stored="true" multiValued="true"/>

			
 
				+   

			
 
				+   <!-- catchall field, containing all other searchable text fields (implemented

			
 
				+        via copyField further on in this schema  -->

			
 
				+   <field name="text" type="text_en" indexed="true" stored="false" multiValued="true"/>

			
 
				+   

			
 
				+   <!-- extra name field allowing dashes to be omitted/misplaced -->

			
 
				+   <field name="name_split" type="text_en_splitting_tight" indexed="true" stored="false" />

			
 
				+

			
 
				+   <!-- extra catchall for ngram searches -->

			
 
				+   <field name="text_ngram" type="text_edgengram" indexed="true" stored="false" multiValued="true" />

			
 
				+ </fields>

			
 
				+

			
 
				+ <!-- Field to use to determine and enforce document uniqueness. 

			
 
				+      Unless this field is marked with required="false", it will be a required field

			
 
				+   -->

			
 
				+ <uniqueKey>id</uniqueKey>

			
 
				+

			
 
				+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->

			
 
				+ <defaultSearchField>text</defaultSearchField>

			
 
				+

			
 
				+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->

			
 
				+ <solrQueryParser defaultOperator="OR"/>

			
 
				+  <!-- copyField commands copy one field to another at the time a document

			
 
				+        is added to the index.  It's used either to index the same field differently,

			
 
				+        or to add multiple fields to the same field for easier/faster searching.  -->

			
 
				+

			
 
				+   <copyField source="name" dest="text"/>

			
 
				+   <copyField source="description" dest="text"/>

			
 
				+   <copyField source="tags" dest="text"/>

			
 
				+

			
 
				+   <copyField source="name" dest="name_split"/>

			
 
				+

			
 
				+   <copyField source="name" dest="text_ngram"/>

			
 
				+   <copyField source="description" dest="text_ngram"/>

			
 
				+   <copyField source="tags" dest="text_ngram"/>

			
 
				+</schema>