php如何将html转成word

互联网 20-10-23

php将html转成word的方法:首先安装zip.dll压缩扩展;然后将规定好了的xml压缩成一个zip包;最后把后缀名改成doc或者docx即可。

推荐:《PHP视频教程》

php实现将HTML页面转换成word并且保存的方法

这里用使用到一个PHP的工具叫:PHPWord。

生成Word的原理是,将堆规定好了的xml压缩成一个zip包,并且把后缀名改成doc或者docx即可。

所以使用PHPWord,需要你的PHP环境安装zip.dll压缩扩展,我写了一个demo.

功能说明:

20150507 — HTML中的<p>标签和<ol>列表标签的获取

20150508 — 新增获取文章中的图片功能

20150509 — 新增行间距,并且过滤一下错误图片

20150514 — 新增表格处理,并且将代码改成面向对象

20150519 — 新增GD库处理网络图片

require_once 'PHPWord.php'; require_once 'SimpleHtmlDom.class.php'; class Word{  private $url;  private $LinetextArr = array();  public $CurrentDir;  public $error = array(); //错误数组  public $filename = null;  public $Allowtag = "p,ol,ul,table";  /**数据统计**/  public $DownImg = 0;  public $expendTime = 0;  public $HttpRequestTime = 0;  public $ContentLen = 0;  public $HttpRequestArr = array();  public $expendmemory = 0;  public function __construct($url)  {  $startTime = $this->_Time();  $startMemory = $this->_memory();  $this->url = $url;  $UrlArr = parse_url($this->url);  $this->host = $UrlArr["scheme"]."://".$UrlArr['host'];  $this->CurrentDir = getcwd();  $this->LinetextArr["table"] = array();  $html = new simple_html_dom($this->url);  $this->HttpRequestArr[] = $this->url;  $this->HttpRequestTime++;  foreach($html->find($this->Allowtag) as $key=>$value)  {  if($value->tag == "table")  {  $this->ParseTable($value,0,$this->LinetextArr["table"]);  }  else  {  $this->AnalysisHtmlDom($value);  }  $this->error[] = error_get_last();  }  $endTime = $this->_Time();  $endMemory = $this->_memory();  $this->expendTime = round(($endTime-$startTime),2); //微秒  $this->expendmemory = round(($endMemory-$startMemory)/1000,2); //bytes  $this->CreateWordDom();  }  private function _Time()  {  return array_sum(explode(" ", microtime()));  }  private function _memory()  {  return memory_get_usage();  }  /**  * 解析HTML中的Table,这里考虑到多层table嵌套的情况  * @param $value HTMLDOM  * @param $i 遍历层级  * **/  private function ParseTable($value,$i,$Arr)  {  if($value->firstChild() && in_array($value->firstChild()->tag,array("table","tbody","thead","tfoot","tr")))  {  foreach($value->children as $k=>$v)  {  $this->ParseTable($v,$i++,$Arr);  }  }  else  {  foreach($value->children as $k=>$v)  {  if($v->firstChild() && $v->firstChild()->tag != "table")  {  $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));  }  if(!$v->firstChild())  {  $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));  }  }  }  }  /**  * 解析HTML里面的表情  * @param $value HTMLDOM  * **/  private function AnalysisHtmlDom($value)  {  $tmp = array();  if($value->has_child())  {  foreach($value->children as $k=>$v)  {  $this->AnalysisHtmlDom($v);  }  }  else  {  if($value->tag == "a")  {  $tmp = array("tag"=>$value->tag,"href"=>$value->href,"text"=>$value->innertext);  }  else if($value->tag == "img")  {  $src = $this->unescape($value->src);  $UrlArr = parse_url($src);  if(!isset($UrlArr['host']))  {  $src = $this->host.$value->src;  $UrlArr = parse_url($src);  }  $src = $this->getImageFromNet($src,$UrlArr); //表示有网络图片,需要下载  if($src)  {   $imgsArr = $this->GD($src);   $tmp = array("tag"=>$value->tag,"src"=>$src,"text"=>$value->alt,"width"=>$imgsArr['width'],"height"=>$imgsArr['height']); }  }  else  {  $tmp = array("tag"=>$value->tag,"text"=>strip_tags($value->innertext));  }  $this->LinetextArr[] = $tmp;  }  }  /**  * 根据GD库来获取图片的如果太多,进行比例压缩  * **/  private function GD($src)  {  list($width, $height, $type, $attr) = getimagesize($src);  if($width > 800 || $height > 800 )  {  $width = $width/2;  $height = $height/2;  }  return array("width"=>$width,"height"=>$height);  }  /**  * 将Uincode编码转移回原来的字符  * **/  public function unescape($str) {  $str = rawurldecode($str);  preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);  $ar = $r[0];  foreach($ar as $k=>$v) {  if(substr($v,0,2) == "%u"){  $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,-4)));  }  elseif(substr($v,0,3) == "&#x"){  $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,3,-1)));  }  elseif(substr($v,0,2) == "&#"){  $ar[$k] = iconv("UCS-2BE","UTF-8",pack("n",substr($v,2,-1)));  }  }  return join("",$ar); }  /**  * 图片下载  * @param $Src 目标资源  * @param $UrlArr 目标URL对应的数组  * **/  private function getImageFromNet($Src,$UrlArr)  {  $file = basename($UrlArr['path']);  $ext = explode('.',$file);  $this->ImgDir = $this->CurrentDir."/".$UrlArr['host'];  $_supportedImageTypes = array('jpg', 'jpeg', 'gif', 'png', 'bmp', 'tif', 'tiff');  if(isset($ext['1']) && in_array($ext['1'],$_supportedImageTypes))  {  $file = file_get_contents($Src);  $this->HttpRequestArr[] = $Src;  $this->HttpRequestTime++;  $this->_mkdir(); //创建目录,或者收集错误  $imgName = md5($UrlArr['path']).".".$ext['1'];  file_put_contents($this->ImgDir."/".$imgName,$file);  $this->DownImg++;  return $UrlArr['host']."/".$imgName;  }  return false;  }  /**  * 创建目录  * **/  private function _mkdir()  {  if(!is_dir($this->ImgDir))  {  if(!mkdir($this->ImgDir,"7777"))  {  $this->error[] = error_get_last();  }  }  }  /**  * 构造WordDom  * **/  private function CreateWordDom()  {  $PHPWord = new PHPWord();  $PHPWord->setDefaultFontName('宋体');  $PHPWord->setDefaultFontSize("11");  $styleTable = array('borderSize'=>6, 'borderColor'=>'006699', 'cellMargin'=>120);  // New portrait section  $section = $PHPWord->createSection();  $section->addText($this->Details(),array(),array('spacing'=>120));  //数据进行处理  foreach($this->LinetextArr as $key=>$lineArr)  {  if(isset($lineArr['tag']))  {  if($lineArr['tag'] == "li")  {  $section->addListItem($lineArr['text'],0,"","",array('spacing'=>120));  }  else if($lineArr['tag'] == "img")  {  $section->addImage($lineArr['src'],array('width'=>$lineArr['width'], 'height'=>$lineArr['height'], 'align'=>'center'));  }  else if($lineArr['tag'] == "p")  {  $section->addText($lineArr['text'],array(),array('spacing'=>120));  }  }  else if($key == "table")  {  $PHPWord->addTableStyle('myOwnTableStyle', $styleTable);  $table = $section->addTable("myOwnTableStyle");  foreach($lineArr as $key=>$tr)  {  $table->addRow();  foreach($tr as $ky=>$td)  {  $table->addCell(2000)->addText($td['text']);  }  }  }  }  $this->downFile($PHPWord);  }  public function Details()  {  $msg = "一共请求:{$this->HttpRequestTime}次,共下载的图片有{$this->DownImg}张,并且下载完成大约使用时间:{$this->expendTime}秒,整个程序执行大约消耗内存是:{$this->expendmemory}KB,";  return $msg;  }  public function downFile($PHPWord)  {  if(empty($this->filename))  {  $UrlArr = parse_url($this->url);  $this->filename = $UrlArr['host'].".docx";  }  // Save File  $objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007');  $objWriter->save($this->filename);  header("Pragma: public");  header("Expires: 0");  header("Cache-Control: must-revalidate, post-check=0, pre-check=0");  header("Cache-Control: public");  header("Content-Description: File Transfer");  //Use the switch-generated Content-Type  header('Content-type: application/msword');//输出的类型  //Force the download  $header="Content-Disposition: attachment; filename=".$this->filename.";";  header($header);  @readfile($this->filename);  } }

上面的代码重点感觉不是word生成,而是Simplehtmldom的使用,这是一个开源的HTML解析器,之前有提到,这几天在看他的代码,

引出了两个学习方向

① 正在表达式

② 这个扩展的函数整理

看源代码的收获:

PHP的异常是可以捕获的,而且PHP的错误也是可以捕获的。

error_get_last() //用这个函数可以捕获页面中的PHP错误,不谢。

以上就是php如何将html转成word的详细内容,更多内容请关注技术你好其它相关文章!

来源链接:
免责声明:
1.资讯内容不构成投资建议,投资者应独立决策并自行承担风险
2.本文版权归属原作所有,仅代表作者本人观点,不代表本站的观点或立场
标签: word
上一篇:php获取远程图片并下载保存到本地的方法分析 下一篇:php simplexml 如何删除节点

相关资讯