PHP如何切割excel大文件(附完整代码)

本篇文章主要讲述的是利用phpspreadsheet切割excel大文件,具有一定的参考价值,感兴趣的朋友可以了解一下,希望对你有所启发。

利用phpspreadsheet可以轻松的解析excel文件,但是phpspreadsheet的内存消耗也是比较大的,我试过解析将近5M的纯文字excel内存使用量就会超过php默认的最大内存128M。

当然这可以用调节内存大小的方法来解决,但是在并发量大的时候就比较危险了。所以今天介绍下一种方法,利用phpspreadsheet对excel文件进行切割,这是个拿时间换空间的方法所以一般对时效性要求低的需求可以使用。

方法:先放个phpspreadsheet官网提供的一个功能readCell,我们就可以利用这个功能来进行切割。

首先对excel文件进行预读,主要是获取所有的工作表以及工作表下面的数据行数,这个阶段readCell方法一直返回的都是false,我们只需要记录readCell进来的工作表及数据行数。

然后就是对获取到的记录进行分析,确定每部分数据需要装多少行原始excel的数据,需要注意的是为了避免内容混淆,不要讲两个工作表的内容切到一起。

最后就是循环分析的数据和再次利用readCell获取每部分数据,注意每次读取文件后都要利用disconnectWorksheets方法清理phpspreadsheet的内存。

经过我自己的测试发现,利用该方法解析5M的excel文件,平均只需要21M的内存就可以搞定!

代码:

  1. <?php
  2. namespace CutExcel;
  3. require_once 'PhpSpreadsheet/autoload.php';
  4. /**
  5. * 预读过滤类
  6. * @author wangyelou
  7. * @date 2018-07-30
  8. */
  9. class MyAheadreadFilter implements \PhpOffice\PhpSpreadsheet\Reader\IReadFilter
  10. {
  11. public $record = array();
  12. private $lastRow = '';
  13. public function readCell($column, $row, $worksheetName = '')
  14. {
  15. if (isset($this->record[$worksheetName]) ) {
  16. if ($this->lastRow != $row) {
  17. $this->record[$worksheetName] ++;
  18. $this->lastRow = $row;
  19. }
  20. } else {
  21. $this->record[$worksheetName] = 1;
  22. $this->lastRow = $row;
  23. }
  24. return false;
  25. }
  26. }
  27. /**
  28. * 解析过滤类
  29. * @author wangyelou
  30. * @date 2018-07-30
  31. */
  32. class MyreadFilter implements \PhpOffice\PhpSpreadsheet\Reader\IReadFilter
  33. {
  34. public $startRow;
  35. public $endRow;
  36. public $worksheetName;
  37. public function readCell($column, $row, $worksheetName = '')
  38. {
  39. if ($worksheetName == $this->worksheetName && $row >= ($this->startRow+1) && $row <= ($this->endRow+1)) {
  40. return true;
  41. }
  42. return false;
  43. }
  44. }
  45. /**
  46. * 切割类
  47. * @author wangyelou
  48. * @date 2018-07-30
  49. */
  50. class excelCut
  51. {
  52. public $cutNum = 5;
  53. public $returnType = 'Csv';
  54. public $fileDir = '/tmp/';
  55. public $log;
  56. /**
  57. * 切割字符串
  58. * @param $str
  59. * @return array|bool
  60. */
  61. public function cutFromStr($str)
  62. {
  63. try {
  64. $filePath = '/tmp/' . time() . mt_rand(1000, 9000) . $this->returnType;
  65. file_put_contents($filePath, $str);
  66. if (file_exists($filePath)) {
  67. $result = $this->cutFromFile($filePath);
  68. unlink($filePath);
  69. return $result;
  70. } else {
  71. throw new Exception('文件写入错误');
  72. }
  73. } catch (Exception $e) {
  74. $this->log = $e->getMessage();
  75. return false;
  76. }
  77. }
  78. /**
  79. * 切割文件
  80. * @param $file
  81. * @return array|bool
  82. */
  83. public function cutFromFile($file)
  84. {
  85. try {
  86. $cutRules = $this->readaheadFromFile($file);
  87. $dir = $this->getFileDir($file);
  88. $returnType = $this->returnType ? $this->returnType : 'Csv';
  89. $results = array();
  90. //初始化读
  91. $myFilter = new MyreadFilter();
  92. $inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($file);
  93. $reader = \PhpOffice\PhpSpreadsheet\IOFactory::createReader($inputFileType);
  94. $reader->setReadDataOnly(true);
  95. $reader->setReadFilter($myFilter);
  96. foreach ($cutRules as $sheetName => $rowIndexRange) {
  97. //读
  98. list($myFilter->startRow, $myFilter->endRow, $myFilter->worksheetName) = $rowIndexRange;
  99. $spreadsheetReader = $reader->load($file);
  100. $sheetData = $spreadsheetReader->setActiveSheetIndexByName($myFilter->worksheetName)->toArray(null, false, false, false);
  101. $realDatas = array_splice($sheetData, $myFilter->startRow, ($myFilter->endRow - $myFilter->startRow + 1));
  102. $spreadsheetReader->disconnectWorksheets();
  103. unset($sheetData);
  104. unset($spreadsheetReader);
  105. //写
  106. $saveFile = $dir . $sheetName . '.' . $returnType;
  107. $spreadsheetWriter = new \PhpOffice\PhpSpreadsheet\Spreadsheet();
  108. foreach ($realDatas as $rowIndex => $row) {
  109. foreach ($row as $colIndex => $col) {
  110. $spreadsheetWriter->getActiveSheet()->setCellValueByColumnAndRow($colIndex+1, $rowIndex+1, $col);
  111. }
  112. }
  113. $writer = \PhpOffice\PhpSpreadsheet\IOFactory::createWriter($spreadsheetWriter, $returnType);
  114. $writer->save($saveFile);
  115. $spreadsheetWriter->disconnectWorksheets();
  116. unset($spreadsheetWriter);
  117. $results[] = $saveFile;
  118. }
  119. return $results;
  120. } catch (Exception $e) {
  121. $this->log = $e->getMessage();
  122. return false;
  123. }
  124. }
  125. /**
  126. * 预读文件
  127. */
  128. public function readaheadFromFile($file)
  129. {
  130. if (file_exists($file)) {
  131. //获取统计数据
  132. $myFilter = new MyAheadreadFilter();
  133. $inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($file);
  134. $reader = \PhpOffice\PhpSpreadsheet\IOFactory::createReader($inputFileType);
  135. $reader->setReadDataOnly(true); //只读数据
  136. $reader->setReadFilter($myFilter);
  137. $spreadsheet = $reader->load($file);
  138. //$sheetData = $spreadsheet->getActiveSheet()->toArray(null, false, false, false);
  139. list($fileName,) = explode('.', basename($file));
  140. $datas = array();
  141. $averageNum = ceil(array_sum($myFilter->record) / $this->cutNum);
  142. foreach ($myFilter->record as $sheetName => $count) {
  143. for ($i=0; $i<ceil($count/$averageNum); $i++) {
  144. $datas[$fileName . '_' . $sheetName . '_' . $i] = array($i*$averageNum, ($i+1)*$averageNum-1, $sheetName);
  145. }
  146. }
  147. return $datas;
  148. } else {
  149. throw new Exception($file . ' not exists');
  150. }
  151. }
  152. /**
  153. * 创建目录
  154. * @param $file
  155. * @return bool|string
  156. */
  157. protected function getFileDir($file)
  158. {
  159. $baseName = basename($file);
  160. list($name) = explode('.', $baseName);
  161. $fullName = $name .'_'. time() . '_' . mt_rand(1000, 9999);
  162. $path = $this->fileDir . $fullName . '/';
  163. mkdir($path, 0777);
  164. chmod($path, 0777);
  165. if (is_dir($path)) {
  166. return $path;
  167. } else {
  168. $this->log = "mkdir {$path} failed";
  169. return false;
  170. }
  171. }
  172. }