balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

word-count.js (7696B)


      1 /**
      2  * Word or character counting functionality. Count words or characters in a
      3  * provided text string.
      4  *
      5  * @namespace wp.utils
      6  *
      7  * @since 2.6.0
      8  * @output wp-admin/js/word-count.js
      9  */
     10 
     11 ( function() {
     12 	/**
     13 	 * Word counting utility
     14 	 *
     15 	 * @namespace wp.utils.wordcounter
     16 	 * @memberof  wp.utils
     17 	 *
     18 	 * @class
     19 	 *
     20 	 * @param {Object} settings                                   Optional. Key-value object containing overrides for
     21 	 *                                                            settings.
     22 	 * @param {RegExp} settings.HTMLRegExp                        Optional. Regular expression to find HTML elements.
     23 	 * @param {RegExp} settings.HTMLcommentRegExp                 Optional. Regular expression to find HTML comments.
     24 	 * @param {RegExp} settings.spaceRegExp                       Optional. Regular expression to find irregular space
     25 	 *                                                            characters.
     26 	 * @param {RegExp} settings.HTMLEntityRegExp                  Optional. Regular expression to find HTML entities.
     27 	 * @param {RegExp} settings.connectorRegExp                   Optional. Regular expression to find connectors that
     28 	 *                                                            split words.
     29 	 * @param {RegExp} settings.removeRegExp                      Optional. Regular expression to find remove unwanted
     30 	 *                                                            characters to reduce false-positives.
     31 	 * @param {RegExp} settings.astralRegExp                      Optional. Regular expression to find unwanted
     32 	 *                                                            characters when searching for non-words.
     33 	 * @param {RegExp} settings.wordsRegExp                       Optional. Regular expression to find words by spaces.
     34 	 * @param {RegExp} settings.characters_excluding_spacesRegExp Optional. Regular expression to find characters which
     35 	 *                                                            are non-spaces.
     36 	 * @param {RegExp} settings.characters_including_spacesRegExp Optional. Regular expression to find characters
     37 	 *                                                            including spaces.
     38 	 * @param {RegExp} settings.shortcodesRegExp                  Optional. Regular expression to find shortcodes.
     39 	 * @param {Object} settings.l10n                              Optional. Localization object containing specific
     40 	 *                                                            configuration for the current localization.
     41 	 * @param {string} settings.l10n.type                         Optional. Method of finding words to count.
     42 	 * @param {Array}  settings.l10n.shortcodes                   Optional. Array of shortcodes that should be removed
     43 	 *                                                            from the text.
     44 	 *
     45 	 * @return {void}
     46 	 */
     47 	function WordCounter( settings ) {
     48 		var key,
     49 			shortcodes;
     50 
     51 		// Apply provided settings to object settings.
     52 		if ( settings ) {
     53 			for ( key in settings ) {
     54 
     55 				// Only apply valid settings.
     56 				if ( settings.hasOwnProperty( key ) ) {
     57 					this.settings[ key ] = settings[ key ];
     58 				}
     59 			}
     60 		}
     61 
     62 		shortcodes = this.settings.l10n.shortcodes;
     63 
     64 		// If there are any localization shortcodes, add this as type in the settings.
     65 		if ( shortcodes && shortcodes.length ) {
     66 			this.settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
     67 		}
     68 	}
     69 
     70 	// Default settings.
     71 	WordCounter.prototype.settings = {
     72 		HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
     73 		HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
     74 		spaceRegExp: /&nbsp;|&#160;/gi,
     75 		HTMLEntityRegExp: /&\S+?;/g,
     76 
     77 		// \u2014 = em-dash.
     78 		connectorRegExp: /--|\u2014/g,
     79 
     80 		// Characters to be removed from input text.
     81 		removeRegExp: new RegExp( [
     82 			'[',
     83 
     84 				// Basic Latin (extract).
     85 				'\u0021-\u0040\u005B-\u0060\u007B-\u007E',
     86 
     87 				// Latin-1 Supplement (extract).
     88 				'\u0080-\u00BF\u00D7\u00F7',
     89 
     90 				/*
     91 				 * The following range consists of:
     92 				 * General Punctuation
     93 				 * Superscripts and Subscripts
     94 				 * Currency Symbols
     95 				 * Combining Diacritical Marks for Symbols
     96 				 * Letterlike Symbols
     97 				 * Number Forms
     98 				 * Arrows
     99 				 * Mathematical Operators
    100 				 * Miscellaneous Technical
    101 				 * Control Pictures
    102 				 * Optical Character Recognition
    103 				 * Enclosed Alphanumerics
    104 				 * Box Drawing
    105 				 * Block Elements
    106 				 * Geometric Shapes
    107 				 * Miscellaneous Symbols
    108 				 * Dingbats
    109 				 * Miscellaneous Mathematical Symbols-A
    110 				 * Supplemental Arrows-A
    111 				 * Braille Patterns
    112 				 * Supplemental Arrows-B
    113 				 * Miscellaneous Mathematical Symbols-B
    114 				 * Supplemental Mathematical Operators
    115 				 * Miscellaneous Symbols and Arrows
    116 				 */
    117 				'\u2000-\u2BFF',
    118 
    119 				// Supplemental Punctuation.
    120 				'\u2E00-\u2E7F',
    121 			']'
    122 		].join( '' ), 'g' ),
    123 
    124 		// Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
    125 		astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
    126 		wordsRegExp: /\S\s+/g,
    127 		characters_excluding_spacesRegExp: /\S/g,
    128 
    129 		/*
    130 		 * Match anything that is not a formatting character, excluding:
    131 		 * \f = form feed
    132 		 * \n = new line
    133 		 * \r = carriage return
    134 		 * \t = tab
    135 		 * \v = vertical tab
    136 		 * \u00AD = soft hyphen
    137 		 * \u2028 = line separator
    138 		 * \u2029 = paragraph separator
    139 		 */
    140 		characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
    141 		l10n: window.wordCountL10n || {}
    142 	};
    143 
    144 	/**
    145 	 * Counts the number of words (or other specified type) in the specified text.
    146 	 *
    147 	 * @since 2.6.0
    148 	 *
    149 	 * @memberof wp.utils.wordcounter
    150 	 *
    151 	 * @param {string}  text Text to count elements in.
    152 	 * @param {string}  type Optional. Specify type to use.
    153 	 *
    154 	 * @return {number} The number of items counted.
    155 	 */
    156 	WordCounter.prototype.count = function( text, type ) {
    157 		var count = 0;
    158 
    159 		// Use default type if none was provided.
    160 		type = type || this.settings.l10n.type;
    161 
    162 		// Sanitize type to one of three possibilities: 'words', 'characters_excluding_spaces' or 'characters_including_spaces'.
    163 		if ( type !== 'characters_excluding_spaces' && type !== 'characters_including_spaces' ) {
    164 			type = 'words';
    165 		}
    166 
    167 		// If we have any text at all.
    168 		if ( text ) {
    169 			text = text + '\n';
    170 
    171 			// Replace all HTML with a new-line.
    172 			text = text.replace( this.settings.HTMLRegExp, '\n' );
    173 
    174 			// Remove all HTML comments.
    175 			text = text.replace( this.settings.HTMLcommentRegExp, '' );
    176 
    177 			// If a shortcode regular expression has been provided use it to remove shortcodes.
    178 			if ( this.settings.shortcodesRegExp ) {
    179 				text = text.replace( this.settings.shortcodesRegExp, '\n' );
    180 			}
    181 
    182 			// Normalize non-breaking space to a normal space.
    183 			text = text.replace( this.settings.spaceRegExp, ' ' );
    184 
    185 			if ( type === 'words' ) {
    186 
    187 				// Remove HTML Entities.
    188 				text = text.replace( this.settings.HTMLEntityRegExp, '' );
    189 
    190 				// Convert connectors to spaces to count attached text as words.
    191 				text = text.replace( this.settings.connectorRegExp, ' ' );
    192 
    193 				// Remove unwanted characters.
    194 				text = text.replace( this.settings.removeRegExp, '' );
    195 			} else {
    196 
    197 				// Convert HTML Entities to "a".
    198 				text = text.replace( this.settings.HTMLEntityRegExp, 'a' );
    199 
    200 				// Remove surrogate points.
    201 				text = text.replace( this.settings.astralRegExp, 'a' );
    202 			}
    203 
    204 			// Match with the selected type regular expression to count the items.
    205 			text = text.match( this.settings[ type + 'RegExp' ] );
    206 
    207 			// If we have any matches, set the count to the number of items found.
    208 			if ( text ) {
    209 				count = text.length;
    210 			}
    211 		}
    212 
    213 		return count;
    214 	};
    215 
    216 	// Add the WordCounter to the WP Utils.
    217 	window.wp = window.wp || {};
    218 	window.wp.utils = window.wp.utils || {};
    219 	window.wp.utils.WordCounter = WordCounter;
    220 } )();