|
@@ -0,0 +1,272 @@
|
|
|
|
+<?xml version="1.0" encoding="UTF-8" ?>
|
|
|
|
+<schema name="packagist" version="1.4">
|
|
|
|
+ <types>
|
|
|
|
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
|
|
|
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
|
|
|
+ <!-- boolean type: "true" or "false" -->
|
|
|
|
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
|
|
|
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
|
|
|
+ <fieldtype name="binary" class="solr.BinaryField"/>
|
|
|
|
+
|
|
|
|
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
|
|
|
|
+ currently supported on types that are sorted internally as strings
|
|
|
|
+ and on numeric types.
|
|
|
|
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
|
|
|
|
+ int, float, long, date, double, including the "Trie" variants.
|
|
|
|
+ - If sortMissingLast="true", then a sort on this field will cause documents
|
|
|
|
+ without the field to come after documents with the field,
|
|
|
|
+ regardless of the requested sort order (asc or desc).
|
|
|
|
+ - If sortMissingFirst="true", then a sort on this field will cause documents
|
|
|
|
+ without the field to come before documents with the field,
|
|
|
|
+ regardless of the requested sort order.
|
|
|
|
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
|
|
|
+ then default lucene sorting will be used which places docs without the
|
|
|
|
+ field first in an ascending sort and last in a descending sort.
|
|
|
|
+ -->
|
|
|
|
+
|
|
|
|
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
|
|
|
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
|
|
|
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
|
|
|
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
|
|
|
+
|
|
|
|
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
|
|
|
+
|
|
|
|
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
|
|
|
|
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
|
|
|
+
|
|
|
|
+ <!-- A edge-ngram'd text field that can be used for wildcard matching -->
|
|
|
|
+ <fieldType name="text_edgengram" class="solr.TextField" positionIncrementGap="100">
|
|
|
|
+ <analyzer type="index">
|
|
|
|
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="100" />
|
|
|
|
+ </analyzer>
|
|
|
|
+ <analyzer type="query">
|
|
|
|
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <!-- A general text field that has reasonable, generic
|
|
|
|
+ cross-language defaults: it tokenizes with StandardTokenizer,
|
|
|
|
+ removes stop words from case-insensitive "stopwords.txt"
|
|
|
|
+ (empty by default), and down cases. At query time only, it
|
|
|
|
+ also applies synonyms. -->
|
|
|
|
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
|
|
|
+ <analyzer type="index">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ <analyzer type="query">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <!-- Just like text_general except it reverses the characters of
|
|
|
|
+ each token, to enable more efficient leading wildcard queries. -->
|
|
|
|
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
|
|
|
|
+ <analyzer type="index">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
|
|
|
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ <analyzer type="query">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <!-- A text field with defaults appropriate for English: it
|
|
|
|
+ tokenizes with StandardTokenizer, removes English stop words
|
|
|
|
+ (stopwords_en.txt), down cases, protects words from protwords.txt, and
|
|
|
|
+ finally applies Porter's stemming. The query time analyzer
|
|
|
|
+ also applies synonyms from synonyms.txt. -->
|
|
|
|
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
|
|
|
+ <analyzer type="index">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <!-- in this example, we will only use synonyms at query time
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
|
|
|
+ -->
|
|
|
|
+ <!-- Case insensitive stop word removal.
|
|
|
|
+ add enablePositionIncrements=true in both the index and query
|
|
|
|
+ analyzers to leave a 'gap' for more accurate phrase queries.
|
|
|
|
+ -->
|
|
|
|
+ <filter class="solr.StopFilterFactory"
|
|
|
|
+ ignoreCase="true"
|
|
|
|
+ words="stopwords_en.txt"
|
|
|
|
+ enablePositionIncrements="true"
|
|
|
|
+ />
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
|
|
|
|
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
|
|
|
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
|
|
|
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
|
|
|
+ -->
|
|
|
|
+ <filter class="solr.PorterStemFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ <analyzer type="query">
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory"
|
|
|
|
+ ignoreCase="true"
|
|
|
|
+ words="stopwords_en.txt"
|
|
|
|
+ enablePositionIncrements="true"
|
|
|
|
+ />
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
|
|
|
|
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
|
|
|
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
|
|
|
|
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
|
|
|
+ -->
|
|
|
|
+ <filter class="solr.PorterStemFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <!-- A text field with defaults appropriate for English, plus
|
|
|
|
+ aggressive word-splitting and autophrase features enabled.
|
|
|
|
+ This field is just like text_en, except it adds
|
|
|
|
+ WordDelimiterFilter to enable splitting and matching of
|
|
|
|
+ words on case-change, alpha numeric boundaries, and
|
|
|
|
+ non-alphanumeric chars. This means certain compound word
|
|
|
|
+ cases will work, for example query "wi fi" will match
|
|
|
|
+ document "WiFi" or "wi-fi". However, other cases will still
|
|
|
|
+ not match, for example if the query is "wifi" and the
|
|
|
|
+ document is "wi fi" or if the query is "wi-fi" and the
|
|
|
|
+ document is "wifi".
|
|
|
|
+ -->
|
|
|
|
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
|
|
|
+ <analyzer type="index">
|
|
|
|
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
|
+ <!-- in this example, we will only use synonyms at query time
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
|
|
|
+ -->
|
|
|
|
+ <!-- Case insensitive stop word removal.
|
|
|
|
+ add enablePositionIncrements=true in both the index and query
|
|
|
|
+ analyzers to leave a 'gap' for more accurate phrase queries.
|
|
|
|
+ -->
|
|
|
|
+ <filter class="solr.StopFilterFactory"
|
|
|
|
+ ignoreCase="true"
|
|
|
|
+ words="stopwords_en.txt"
|
|
|
|
+ enablePositionIncrements="true"
|
|
|
|
+ />
|
|
|
|
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
|
|
|
+ <filter class="solr.PorterStemFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ <analyzer type="query">
|
|
|
|
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory"
|
|
|
|
+ ignoreCase="true"
|
|
|
|
+ words="stopwords_en.txt"
|
|
|
|
+ enablePositionIncrements="true"
|
|
|
|
+ />
|
|
|
|
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
|
|
|
+ <filter class="solr.PorterStemFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
|
|
|
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
|
|
|
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
|
|
|
+ <analyzer>
|
|
|
|
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
|
|
|
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
|
|
|
|
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
|
|
|
+ <filter class="solr.LowerCaseFilterFactory"/>
|
|
|
|
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
|
|
|
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
|
|
|
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
|
|
|
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
|
|
|
|
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldType>
|
|
|
|
+
|
|
|
|
+ <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
|
|
|
+ <analyzer>
|
|
|
|
+ <tokenizer class="solr.StandardTokenizerFactory"/>
|
|
|
|
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
|
|
|
+ </analyzer>
|
|
|
|
+ </fieldtype>
|
|
|
|
+
|
|
|
|
+ <!-- since fields of this type are by default not stored or indexed,
|
|
|
|
+ any data added to them will be ignored outright. -->
|
|
|
|
+ <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
|
|
|
+ </types>
|
|
|
|
+
|
|
|
|
+ <fields>
|
|
|
|
+ <!-- Valid attributes for fields:
|
|
|
|
+ name: mandatory - the name for the field
|
|
|
|
+ type: mandatory - the name of a previously defined type from the
|
|
|
|
+ <types> section
|
|
|
|
+ indexed: true if this field should be indexed (searchable or sortable)
|
|
|
|
+ stored: true if this field should be retrievable
|
|
|
|
+ multiValued: true if this field may contain multiple values per document
|
|
|
|
+ omitNorms: (expert) set to true to omit the norms associated with
|
|
|
|
+ this field (this disables length normalization and index-time
|
|
|
|
+ boosting for the field, and saves some memory). Only full-text
|
|
|
|
+ fields or fields that need an index-time boost need norms.
|
|
|
|
+ termVectors: [false] set to true to store the term vector for a
|
|
|
|
+ given field.
|
|
|
|
+ When using MoreLikeThis, fields used for similarity should be
|
|
|
|
+ stored for best performance.
|
|
|
|
+ termPositions: Store position information with the term vector.
|
|
|
|
+ This will increase storage costs.
|
|
|
|
+ termOffsets: Store offset information with the term vector. This
|
|
|
|
+ will increase storage costs.
|
|
|
|
+ default: a value that should be used if no value is specified
|
|
|
|
+ when adding a document.
|
|
|
|
+ -->
|
|
|
|
+
|
|
|
|
+ <field name="id" type="string" indexed="true" stored="true" required="true" />
|
|
|
|
+ <field name="name" type="text_general_rev" indexed="true" stored="true"/>
|
|
|
|
+ <field name="description" type="text_general_rev" indexed="true" stored="true"/>
|
|
|
|
+ <field name="tags" type="text_general_rev" indexed="true" stored="true" multiValued="true"/>
|
|
|
|
+
|
|
|
|
+ <!-- catchall field, containing all other searchable text fields (implemented
|
|
|
|
+ via copyField further on in this schema -->
|
|
|
|
+ <field name="text" type="text_en" indexed="true" stored="false" multiValued="true"/>
|
|
|
|
+
|
|
|
|
+ <!-- extra name field allowing dashes to be omitted/misplaced -->
|
|
|
|
+ <field name="name_split" type="text_en_splitting_tight" indexed="true" stored="false" />
|
|
|
|
+
|
|
|
|
+ <!-- extra catchall for ngram searches -->
|
|
|
|
+ <field name="text_ngram" type="text_edgengram" indexed="true" stored="false" multiValued="true" />
|
|
|
|
+ </fields>
|
|
|
|
+
|
|
|
|
+ <!-- Field to use to determine and enforce document uniqueness.
|
|
|
|
+ Unless this field is marked with required="false", it will be a required field
|
|
|
|
+ -->
|
|
|
|
+ <uniqueKey>id</uniqueKey>
|
|
|
|
+
|
|
|
|
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
|
|
|
+ <defaultSearchField>text</defaultSearchField>
|
|
|
|
+
|
|
|
|
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
|
|
|
+ <solrQueryParser defaultOperator="OR"/>
|
|
|
|
+ <!-- copyField commands copy one field to another at the time a document
|
|
|
|
+ is added to the index. It's used either to index the same field differently,
|
|
|
|
+ or to add multiple fields to the same field for easier/faster searching. -->
|
|
|
|
+
|
|
|
|
+ <copyField source="name" dest="text"/>
|
|
|
|
+ <copyField source="description" dest="text"/>
|
|
|
|
+ <copyField source="tags" dest="text"/>
|
|
|
|
+
|
|
|
|
+ <copyField source="name" dest="name_split"/>
|
|
|
|
+
|
|
|
|
+ <copyField source="name" dest="text_ngram"/>
|
|
|
|
+ <copyField source="description" dest="text_ngram"/>
|
|
|
|
+ <copyField source="tags" dest="text_ngram"/>
|
|
|
|
+</schema>
|