日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 编程语言 > php >内容正文

php

PHP读取docx文档内容

發(fā)布時間:2023/12/31 php 29 豆豆
生活随笔 收集整理的這篇文章主要介紹了 PHP读取docx文档内容 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

引言

客戶需求, 需要從docx文檔讀取內(nèi)容并且做簡單格式化, 難點就在于如何讀取docx格式并且轉(zhuǎn)換為php可以識別的字符串形式, 慣例先貼代碼.

代碼

/*** Class Docx2Text** Docx => String*/ class Docx2Text {const SEPARATOR_TAB = "\t";/*** object zipArchive** @var string* @access private*/private $docx;/*** object domDocument from document.xml** @var string* @access private*/private $domDocument;/*** xml from document.xml** @var string* @access private*/private $_document;/*** xml from numbering.xml** @var string* @access private*/private $_numbering;/*** xml from footnote** @var string* @access private*/private $_footnote;/*** xml from endnote** @var string* @access private*/private $_endnote;/*** array of all the endnotes of the document** @var string* @access private*/private $endnotes;/*** array of all the footnotes of the document** @var string* @access private*/private $footnotes;/*** array of all the relations of the document** @var string* @access private*/private $relations;/*** array of characters to insert like a list** @var string* @access private*/private $numberingList;/*** the text content that will be exported** @var string* @access private*/private $textOuput;/*** boolean variable to know if a chart will be transformed to text** @var string* @access private*/private $chart2text;/*** boolean variable to know if a table will be transformed to text** @var string* @access private*/private $table2text;/*** boolean variable to know if a list will be transformed to text** @var string* @access private*/private $list2text;/*** boolean variable to know if a paragraph will be transformed to text** @var string* @access private*/private $paragraph2text;/*** boolean variable to know if footnotes will be extracteded** @var string* @access private*/private $footnote2text;/*** boolean variable to know if endnotes will be extracted** @var string* @access private*/private $endnote2text;/*** Construct** @param $boolTransforms array of boolean values of which elements should be transformed or not* @access public*/public function __construct($boolTransforms = array()){//table,list, paragraph, footnote, endnote, chartif (isset($boolTransforms['table'])) {$this->table2text = $boolTransforms['table'];} else {$this->table2text = true;}if (isset($boolTransforms['list'])) {$this->list2text = $boolTransforms['list'];} else {$this->list2text = true;}if (isset($boolTransforms['paragraph'])) {$this->paragraph2text = $boolTransforms['paragraph'];} else {$this->paragraph2text = true;}if (isset($boolTransforms['footnote'])) {$this->footnote2text = $boolTransforms['footnote'];} else {$this->footnote2text = true;}if (isset($boolTransforms['endnote'])) {$this->endnote2text = $boolTransforms['endnote'];} else {$this->endnote2text = true;}if (isset($boolTransforms['chart'])) {$this->chart2text = $boolTransforms['chart'];} else {$this->chart2text = true;}$this->textOuput = '';$this->docx = null;$this->_numbering = '';$this->numberingList = array();$this->endnotes = array();$this->footnotes = array();$this->relations = array();}/**** Extract the content of a word document and create a text file if the name is given** @access public* @param string $filename of the word document.** @return string*/public function extract($filename = ''){if (empty($this->_document)) {//xml content from document.xml is not gotexit('There is no content');}$this->domDocument = new DomDocument();$this->domDocument->loadXML($this->_document);//get the body node to check the content from all his children$bodyNode = $this->domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'body');//We get the body node. it is known that there is only one body tag$bodyNode = $bodyNode->item(0);foreach ($bodyNode->childNodes as $child) {//the children can be a table, a paragraph or a section. We only implement the 2 first option said.if ($this->table2text && $child->tagName == 'w:tbl') {//this node is a table and the content is split with tabs if the variable table2text from the class is true$this->textOuput .= $this->table($child) . $this->separator();} else {//this node is a paragraph$this->textOuput .= $this->printWP($child) . ($this->paragraph2text ? $this->separator() : '');}}if (!empty($filename)) {$this->writeFile($filename, $this->textOuput);} else {return $this->textOuput;}}/*** Setter** @access public* @param $filename*/public function setDocx($filename){$this->docx = new ZipArchive();$ret = $this->docx->open($filename);if ($ret === true) {$this->_document = $this->docx->getFromName('word/document.xml');} else {exit('failed');}}/*** extract the content to an array from endnote.xml** @access private*/private function loadEndNote(){if (empty($this->endnotes)) {if (empty($this->_endnote)) {$this->_endnote = $this->docx->getFromName('word/endnotes.xml');}if (!empty($this->_endnote)) {$domDocument = new DomDocument();$domDocument->loadXML($this->_endnote);$endnotes = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'endnote');foreach ($endnotes as $endnote) {$xml = $endnote->ownerDocument->saveXML($endnote);$this->endnotes[$endnote->getAttribute('w:id')] = trim(strip_tags($xml));}}}}/*** Extract the content to an array from footnote.xml** @access private*/private function loadFootNote(){if (empty($this->footnotes)) {if (empty($this->_footnote)) {$this->_footnote = $this->docx->getFromName('word/footnotes.xml');}if (!empty($this->_footnote)) {$domDocument = new DomDocument();$domDocument->loadXML($this->_footnote);$footnotes = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'footnote');foreach ($footnotes as $footnote) {$xml = $footnote->ownerDocument->saveXML($footnote);$this->footnotes[$footnote->getAttribute('w:id')] = trim(strip_tags($xml));}}}}/*** Extract the styles of the list to an array** @access private*/private function listNumbering(){$ids = array();$nums = array();//get the xml code from the zip archive$this->_numbering = $this->docx->getFromName('word/numbering.xml');if (!empty($this->_numbering)) {//we use the domdocument to iterate the children of the numbering tag$domDocument = new DomDocument();$domDocument->loadXML($this->_numbering);$numberings = $domDocument->getElementsByTagNameNS('http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'numbering');//there is only one numbering tag in the numbering.xml$numberings = $numberings->item(0);foreach ($numberings->childNodes as $child) {$flag = true;//boolean variable to know if the node is the first style of the listforeach ($child->childNodes as $son) {if ($child->tagName == 'w:abstractNum' && $son->tagName == 'w:lvl') {foreach ($son->childNodes as $daughter) {if ($daughter->tagName == 'w:numFmt' && $flag) {$nums[$child->getAttribute('w:abstractNumId')] = $daughter->getAttribute('w:val');//set the key with internal index for the listand the value it is the type of bullet$flag = false;}}} elseif ($child->tagName == 'w:num' && $son->tagName == 'w:abstractNumId') {$ids[$son->getAttribute('w:val')] = $child->getAttribute('w:numId');//$ids is the index of the list}}}//once we know what kind of list there is in the documents, is prepared the bullet that the library will useforeach ($ids as $ind => $id) {if ($nums[$ind] == 'decimal') {//if the type is decimal it means that the bullet will be numbers$this->numberingList[$id][0] = range(1, 10);$this->numberingList[$id][1] = range(1, 10);$this->numberingList[$id][2] = range(1, 10);$this->numberingList[$id][3] = range(1, 10);} else {//otherwise is *, and other characters$this->numberingList[$id][0] = array('*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*');$this->numberingList[$id][1] = array(chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175), chr(175));$this->numberingList[$id][2] = array(chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237), chr(237));$this->numberingList[$id][3] = array(chr(248), chr(248), chr(248), chr(248), chr(248), chr(248), chr(248), chr(248), chr(248), chr(248), chr(248));}}}}/*** Extract the content of a w:p tag** @access private* @param $node object* @return string*/private function printWP($node){$ilvl = $numId = -1;if ($this->list2text) {//transform the list in ooxml to formatted list with tabs and bulletsif (empty($this->numberingList)) {//check if numbering.xml is extracted from the zip archive$this->listNumbering();}//use the xpath to get expecific children from a node$xpath = new DOMXPath($this->domDocument);$query = 'w:pPr/w:numPr';$xmlLists = $xpath->query($query, $node);$xmlLists = $xmlLists->item(0);//if ($xmlLists->tagName == 'w:numPr') {// if ($xmlLists->hasChildNodes()) {// foreach ($xmlLists->childNodes as $child) {// if ($child->tagName == 'w:ilvl') {// $ilvl = $child->getAttribute('w:val');// }elseif ($child->tagName == 'w:numId') {// $numId = $child->getAttribute('w:val');// }// }// }//}//if (($ilvl != -1) && ($numId != -1)) {// //if is founded the style index of the list in the document and the kind of list// $ret = '';// for($i=-1; $i < $ilvl; $i++) {// if(self::DEBUG) {// $ret .= self::SEPARATOR_TAB_DEBUG;// }// else {// $ret .= self::SEPARATOR_TAB;// }// }// $ret .= array_shift($this->numberingList[$numId][$ilvl]) . ' ' . $this->toText($node); //print the bullet//} else {$ret = $this->toText($node);//}} else {//if dont want to formatted lists, we strip from html tags$ret = $this->toText($node);}//get the data from the chartsif ($this->chart2text) {$query = 'w:r/w:drawing/wp:inline';$xmlChart = $xpath->query($query, $node);//get the relation id from the document, to get the name of the xml chart file from the relations to extract the xml code.foreach ($xmlChart as $chart) {foreach ($chart->childNodes as $child) {foreach ($child->childNodes as $child2) {foreach ($child2->childNodes as $child3) {$rid = $child3->getAttribute('r:id');}}}}//if (!empty($rid)) {// if (empty($this->relations)) {// $this->loadRelations();// }// //get the name of the chart xml file from the relations docuemnt// $dataChart = new getDataFromXmlChart($this->docx->getFromName('word/' . $this->relations[$rid]['file']));// if (in_array($this->chart2text, array(2, 'table'))) {// $ret .= $this->printChartDataTable($dataChart);//formatted print of the chart data// } else {// $ret .= $this->printChartDataArray($dataChart);//formatted print of the chart data// }//}}//extract the expecific endnote to insert with the text contentif ($this->endnote2text) {if (empty($this->endnotes)) {$this->loadEndNote();}$query = 'w:r/w:endnoteReference';$xmlEndNote = $xpath->query($query, $node);foreach ($xmlEndNote as $note) {$ret .= '[' . $this->endnotes[$note->getAttribute('w:id')] . '] ';}}//extract the expecific footnote to insert with the text contentif ($this->footnote2text) {if (empty($this->footnotes)) {$this->loadFootNote();}$query = 'w:r/w:footnoteReference';$xmlFootNote = $xpath->query($query, $node);foreach ($xmlFootNote as $note) {$ret .= '[' . $this->footnotes[$note->getAttribute('w:id')] . '] ';}}if ((($ilvl != -1) && ($numId != -1)) || (1)) {$ret .= $this->separator();}return $ret;}/*** return a text end of line** @access private*/private function separator(){return "\r\n";}/**** Extract the content of a table node from the document.xml and return a text content** @access private* @param $node object** @return string*/private function table($node){$output = '';if ($node->hasChildNodes()) {foreach ($node->childNodes as $child) {//start a new line of the tableif ($child->tagName == 'w:tr') {foreach ($child->childNodes as $cell) {//start a new cellif ($cell->tagName == 'w:tc') {if ($cell->hasChildNodes()) {//foreach ($cell->childNodes as $p) {$output .= $this->printWP($p);}$output .= self::SEPARATOR_TAB;}}}}$output .= $this->separator();}}return $output;}/**** Extract the content of a node from the document.xml and return only the text content and. stripping the html tags** @access private* @param $node object** @return string*/private function toText($node){$xml = $node->ownerDocument->saveXML($node);return trim(strip_tags($xml));} }// 實例化 $text = new Docx2Text(); // 加載docx文件 $text->setDocx('./1.docx'); // 將內(nèi)容存入$docx變量中 $docx = $text->extract(); // 調(diào)試輸出 var_dump($docx);

小結(jié)

代碼中處理docx的類來自這里
其實docx就是xml的一種擴(kuò)展類型的文檔.

總結(jié)

以上是生活随笔為你收集整理的PHP读取docx文档内容的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。

主站蜘蛛池模板: 先锋资源久久 | 榴莲视频黄色 | 亚洲av无码精品色午夜果冻不卡 | 国产精品毛片一区 | 日韩不卡一二三区 | 日韩视频欧美视频 | 欧美黄色免费看 | 欧美一区二区三区四区视频 | 亚洲国产日韩欧美 | 蜜桃做爰免费网站 | 久久第一页 | 性欧美hd调教 | 五月婷婷,六月丁香 | 蜜桃av免费在线观看 | 免费成人深夜夜行p站 | 欧美色涩在线第一页 | 在线免费看mv的网站入口 | 一级高清毛片 | 豆花免费跳转入口官网 | 中国一级特黄真人毛片免费观看 | 国产成人亚洲精品自产在线 | japan高清日本乱xxxxx | 精品国产69 | 伊人精品视频在线观看 | 国产一区二区高清 | 亚洲精品国产电影 | 国产激情视频网站 | 成人精品视频在线播放 | 日日干日日爽 | 青草视频在线观看视频 | 蜜桃av成人永久免费 | www亚洲视频 | 成人看片泡妞 | xxxxhdvideos| 高清视频免费在线观看 | 国产麻豆电影在线观看 | 东京久久久 | 色汉综合| 日本爱爱免费视频 | 亚洲另类av | 中文欧美日韩 | 国产精品玖玖玖 | 性久久久久久久久 | 久久久中文 | 国产白丝袜美女久久久久 | 日韩欧美色图 | 少妇一级淫免费播放 | 一本大道综合伊人精品热热 | 欧美色图视频在线 | 国产黄a三级三级看三级 | 午夜网址| 亚洲成人一 | 爱情岛亚洲品质自拍极速福利网站 | 毛片免费全部无码播放 | 日韩123区 | 国产一区视频免费观看 | 久久久夜色精品亚洲 | 午夜尤物 | 国产精品推荐 | 久久久久久久久久福利 | 九色视频自拍 | 在线免费观看黄色av | 啊灬啊灬啊灬秀婷 | av制服丝袜 | 日韩性生活大片 | 国产尤物在线观看 | av图片在线观看 | 777中文字幕 | 日韩免费黄色片 | 一级片av | 91大片免费看 | 自拍1区 | 午夜你懂的 | 国产精品一区二区精品 | 蜜臀av在线观看 | 久久精品国产一区二区电影 | 先锋资源一区二区 | 久久第一页 | 女人和拘做爰正片视频 | 中文字幕日韩一区 | 一区二区三区精品视频在线观看 | 国产剧情一区 | 少妇被粗大猛进进出出s小说 | 国产精品自产拍在线观看 | 欧美视频xxx | 亚洲精品一区二区三 | 黄色资源网站 | 欧美一级性生活视频 | 色婷婷视频 | 农村妇女av| 久久久成人精品一区二区三区 | 亚洲成人观看 | 欧美又黑又粗 | 涩涩涩在线视频 | 亚洲性欧美色 | 美女一区二区三区 | 国产精品久久久久久av | 亚州男人天堂 | 老女人人体欣赏a√s |