shop.balmet.com

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

spreadsheetreader_csv.php (6712B)


      1 <?php
      2 /**
      3  * Class for parsing CSV files
      4  *
      5  * @author Martins Pilsetnieks
      6  */
      7 class SpreadsheetReader_CSV implements Iterator, Countable {
      8 	/**
      9 	 * @var array Options array, pre-populated with the default values.
     10 	 */
     11 	private $Options = array(
     12 		'Delimiter' => ';',
     13 		'Enclosure' => '"'
     14 	);
     15 
     16 	private $Encoding = 'UTF-8';
     17 	private $BOMLength = 0;
     18 
     19 	/**
     20 	 * @var resource File handle
     21 	 */
     22 	private $Handle = false;
     23 
     24 	private $Filepath = '';
     25 
     26 	private $Index = 0;
     27 
     28 	private $CurrentRow = null;
     29 
     30 	/**
     31 	 * @param string Path to file
     32 	 * @param array Options:
     33 	 *	Enclosure => string CSV enclosure
     34 	 *	Separator => string CSV separator
     35 	 */
     36 	public function __construct($Filepath, array $Options = null)
     37 	{
     38 		$this -> Filepath = $Filepath;
     39 
     40 		if (!is_readable($Filepath))
     41 		{
     42 		throw new Exception('SpreadsheetReader_CSV: File not readable ('.$Filepath.')');
     43 		}
     44 
     45 		// For safety's sake
     46 		@ini_set('auto_detect_line_endings', true);
     47 
     48 		$this -> Options = array_merge($this -> Options, $Options);
     49 		$this -> Handle = fopen($Filepath, 'r');
     50 
     51 		// Checking the file for byte-order mark to determine encoding
     52 		$BOM16 = bin2hex(fread($this -> Handle, 2));
     53 		if ($BOM16 == 'fffe')
     54 		{
     55 		$this -> Encoding = 'UTF-16LE';
     56 		//$this -> Encoding = 'UTF-16';
     57 		$this -> BOMLength = 2;
     58 		}
     59 		elseif ($BOM16 == 'feff')
     60 		{
     61 		$this -> Encoding = 'UTF-16BE';
     62 		//$this -> Encoding = 'UTF-16';
     63 		$this -> BOMLength = 2;
     64 		}
     65 
     66 		if (!$this -> BOMLength)
     67 		{		
     68 		fseek($this -> Handle, 0);
     69 		$BOM32 = bin2hex(fread($this -> Handle, 4));
     70 		if ($BOM32 == '0000feff')
     71 		{
     72 			//$this -> Encoding = 'UTF-32BE';
     73 			$this -> Encoding = 'UTF-32';
     74 			$this -> BOMLength = 4;
     75 		}
     76 		elseif ($BOM32 == 'fffe0000')
     77 		{
     78 			//$this -> Encoding = 'UTF-32LE';
     79 			$this -> Encoding = 'UTF-32';
     80 			$this -> BOMLength = 4;
     81 		}
     82 		}
     83 
     84 		fseek($this -> Handle, 0);
     85 		$BOM8 = bin2hex(fread($this -> Handle, 3));
     86 		if ($BOM8 == 'efbbbf')
     87 		{
     88 		$this -> Encoding = 'UTF-8';
     89 		$this -> BOMLength = 3;
     90 		}
     91 
     92 		// Seeking the place right after BOM as the start of the real content
     93 		if ($this -> BOMLength)
     94 		{
     95 		fseek($this -> Handle, $this -> BOMLength);
     96 		}
     97 
     98 		// Checking for the delimiter if it should be determined automatically
     99 		if (!$this -> Options['Delimiter'])
    100 		{
    101 		// fgetcsv needs single-byte separators
    102 		$Semicolon = ';';
    103 		$Tab = "\t";
    104 		$Comma = ',';
    105 
    106 		// Reading the first row and checking if a specific separator character
    107 		// has more columns than others (it means that most likely that is the delimiter).
    108 		$SemicolonCount = count(fgetcsv($this -> Handle, null, $Semicolon));
    109 		fseek($this -> Handle, $this -> BOMLength);
    110 		$TabCount = count(fgetcsv($this -> Handle, null, $Tab));
    111 		fseek($this -> Handle, $this -> BOMLength);
    112 		$CommaCount = count(fgetcsv($this -> Handle, null, $Comma));
    113 		fseek($this -> Handle, $this -> BOMLength);
    114 
    115 		$Delimiter = $Semicolon;
    116 		if ($TabCount > $SemicolonCount || $CommaCount > $SemicolonCount)
    117 		{
    118 			$Delimiter = $CommaCount > $TabCount ? $Comma : $Tab;
    119 		}
    120 
    121 		$this -> Options['Delimiter'] = $Delimiter;
    122 		}
    123 	}
    124 
    125 	/**
    126 	 * Returns information about sheets in the file.
    127 	 * Because CSV doesn't have any, it's just a single entry.
    128 	 *
    129 	 * @return array Sheet data
    130 	 */
    131 	public function Sheets()
    132 	{
    133 		return array(0 => basename($this -> Filepath));
    134 	}
    135 
    136 	/**
    137 	 * Changes sheet to another. Because CSV doesn't have any sheets
    138 	 *	it just rewinds the file so the behaviour is compatible with other
    139 	 *	sheet readers. (If an invalid index is given, it doesn't do anything.)
    140 	 *
    141 	 * @param bool Status
    142 	 */
    143 	public function ChangeSheet($Index)
    144 	{
    145 		if ($Index == 0)
    146 		{
    147 		$this -> rewind();
    148 		return true;
    149 		}
    150 		return false;
    151 	}
    152 
    153 	// !Iterator interface methods
    154 	/** 
    155 	 * Rewind the Iterator to the first element.
    156 	 * Similar to the reset() function for arrays in PHP
    157 	 */ 
    158 	public function rewind()
    159 	{
    160 		fseek($this -> Handle, $this -> BOMLength);
    161 		$this -> CurrentRow = null;
    162 		$this -> Index = 0;
    163 	}
    164 
    165 	/**
    166 	 * Return the current element.
    167 	 * Similar to the current() function for arrays in PHP
    168 	 *
    169 	 * @return mixed current element from the collection
    170 	 */
    171 	public function current()
    172 	{
    173 		if ($this -> Index == 0 && is_null($this -> CurrentRow))
    174 		{
    175 		$this -> next();
    176 		$this -> Index--;
    177 		}
    178 		return $this -> CurrentRow;
    179 	}
    180 
    181 	/** 
    182 	 * Move forward to next element. 
    183 	 * Similar to the next() function for arrays in PHP 
    184 	 */ 
    185 	public function next()
    186 	{
    187 		$this -> CurrentRow = array();
    188 
    189 		// Finding the place the next line starts for UTF-16 encoded files
    190 		// Line breaks could be 0x0D 0x00 0x0A 0x00 and PHP could split lines on the
    191 		//	first or the second linebreak leaving unnecessary \0 characters that mess up
    192 		//	the output.
    193 		if ($this -> Encoding == 'UTF-16LE' || $this -> Encoding == 'UTF-16BE')
    194 		{
    195 		while (!feof($this -> Handle))
    196 		{
    197 			// While bytes are insignificant whitespace, do nothing
    198 			$Char = ord(fgetc($this -> Handle));
    199 			if (!$Char || $Char == 10 || $Char == 13)
    200 			{
    201 			continue;
    202 			}
    203 			else
    204 			{
    205 			// When significant bytes are found, step back to the last place before them
    206 			if ($this -> Encoding == 'UTF-16LE')
    207 			{
    208 				fseek($this -> Handle, ftell($this -> Handle) - 1);
    209 			}
    210 			else
    211 			{
    212 				fseek($this -> Handle, ftell($this -> Handle) - 2);
    213 			}
    214 			break;
    215 			}
    216 		}
    217 		}
    218 
    219 		$this -> Index++;
    220 		$this -> CurrentRow = fgetcsv($this -> Handle, null, $this -> Options['Delimiter'], $this -> Options['Enclosure']);
    221 
    222 		if ($this -> CurrentRow)
    223 		{
    224 		// Converting multi-byte unicode strings
    225 		// and trimming enclosure symbols off of them because those aren't recognized
    226 		// in the relevan encodings.
    227 		if ($this -> Encoding != 'ASCII' && $this -> Encoding != 'UTF-8')
    228 		{
    229 			$Encoding = $this -> Encoding;
    230 			foreach ($this -> CurrentRow as $Key => $Value)
    231 			{
    232 			$this -> CurrentRow[$Key] = trim(trim(
    233 				mb_convert_encoding($Value, 'UTF-8', $this -> Encoding),
    234 				$this -> Options['Enclosure']
    235 			));
    236 			}
    237 
    238 		}
    239 		}
    240 
    241 		return $this -> CurrentRow;
    242 	}
    243 
    244 	/** 
    245 	 * Return the identifying key of the current element.
    246 	 * Similar to the key() function for arrays in PHP
    247 	 *
    248 	 * @return mixed either an integer or a string
    249 	 */ 
    250 	public function key()
    251 	{
    252 		return $this -> Index;
    253 	}
    254 
    255 	/** 
    256 	 * Check if there is a current element after calls to rewind() or next().
    257 	 * Used to check if we've iterated to the end of the collection
    258 	 *
    259 	 * @return boolean FALSE if there's nothing more to iterate over
    260 	 */ 
    261 	public function valid()
    262 	{
    263 		return ($this -> CurrentRow || !feof($this -> Handle));
    264 	}
    265 
    266 	// !Countable interface method
    267 	/**
    268 	 * Ostensibly should return the count of the contained items but this just returns the number
    269 	 * of rows read so far. It's not really correct but at least coherent.
    270 	 */
    271 	public function count()
    272 	{
    273 		return $this -> Index + 1;
    274 	}
    275 }
    276 
    277 ?>