diff -ruN modified/phpdig-1.8.6/admin/robot_functions.php original/phpdig-1.8.6/admin/robot_functions.php --- modified/phpdig-1.8.6/admin/robot_functions.php 2004-12-25 19:30:00.000000000 +0100 +++ original/phpdig-1.8.6/admin/robot_functions.php 2004-12-17 17:26:58.000000000 +0100 @@ -516,17 +516,6 @@ else if (($regs[2] == 'vnd.ms-powerpoint' || $regs[2] == 'mspowerpoint') && PHPDIG_INDEX_MSPOWERPOINT == true) { $status = "MSPOWERPOINT"; } - else if (($regs[2] == 'vnd.sun.xml.writer' - || $regs[2] == 'vnd.sun.xml.writer.template' - || $regs[2] == 'vnd.sun.xml.calc' - || $regs[2] == 'vnd.sun.xml.calc.template' - || $regs[2] == 'vnd.sun.xml.draw' - || $regs[2] == 'vnd.sun.xml.draw.template' - || $regs[2] == 'vnd.sun.xml.impress' - || $regs[2] == 'vnd.sun.xml.impress.template' - || $regs[2] == 'vnd.sun.xml.math') && PHPDIG_INDEX_OPENOFFICE == true) { - $status = "OPENOFFICE"; - } else { $status = "NOFILE"; $stop = true; @@ -538,7 +527,6 @@ } } - elseif (eregi('Last-Modified: *([a-z0-9,: ]+)',$answer,$regs)) { //search last-modified header $lm_date = $regs[1]; @@ -997,14 +985,12 @@ $is_exec_command_msexcel = is_executable(PHPDIG_PARSE_MSEXCEL); $is_exec_command_pdf = is_executable(PHPDIG_PARSE_PDF); $is_exec_command_mspowerpoint = is_executable(PHPDIG_PARSE_MSPOWERPOINT); - $is_exec_command_openoffice = is_executable(PHPDIG_PARSE_OPENOFFICE); } else { $is_exec_command_msword = 1; $is_exec_command_msexcel = 1; $is_exec_command_pdf = 1; $is_exec_command_mspowerpoint = 1; - $is_exec_command_openoffice = 1; } if (is_array($result_test) @@ -1014,11 +1000,10 @@ || $result_test['status'] == 'MSEXCEL' && PHPDIG_INDEX_MSEXCEL == true && file_exists(PHPDIG_PARSE_MSEXCEL) && $is_exec_command_msexcel || $result_test['status'] == 'PDF' && PHPDIG_INDEX_PDF == true && file_exists(PHPDIG_PARSE_PDF) && $is_exec_command_pdf || $result_test['status'] == 'MSPOWERPOINT' && PHPDIG_INDEX_MSPOWERPOINT == true && file_exists(PHPDIG_PARSE_MSPOWERPOINT) && $is_exec_command_mspowerpoint - || $result_test['status'] == 'OPENOFFICE' && PHPDIG_INDEX_OPENOFFICE == true && file_exists(PHPDIG_PARSE_OPENOFFICE) && $is_exec_command_openoffice ) { // $file_content = @file($uri); - if (in_array($result_test['status'],array('MSWORD','MSEXCEL','PDF','MSPOWERPOINT', 'OPENOFFICE'))) { + if (in_array($result_test['status'],array('MSWORD','MSEXCEL','PDF','MSPOWERPOINT'))) { $file_content = array(); $fp = fopen($uri,"rb"); while (!feof($fp)) { @@ -1084,11 +1069,6 @@ $usetool = true; $command = PHPDIG_PARSE_MSPOWERPOINT.' '.PHPDIG_OPTION_MSPOWERPOINT.' '.$tempfile2; break; - - case 'OPENOFFICE': - $usetool = true; - $command = PHPDIG_PARSE_OPENOFFICE.' '.PHPDIG_OPTION_OPENOFFICE.' '.$tempfile2; - break; } if ($usetool) { rename($tempfile1,$tempfile2); @@ -1143,15 +1123,6 @@ if(strlen(PHPDIG_MSPOWERPOINT_EXTENSION) > 0) { $my_new_tempfile = $tempfile2.PHPDIG_MSPOWERPOINT_EXTENSION; } - else { - $my_new_tempfile = $tempfile1; - } - break; - - case 'OPENOFFICE': - if(strlen(PHPDIG_OPENOFFICE_EXTENSION) > 0) { - $my_new_tempfile = $tempfile2.PHPDIG_OPENOFFICE_EXTENSION; - } else { $my_new_tempfile = $tempfile1; } diff -ruN modified/phpdig-1.8.6/documentation/phpdig-doc-en.html original/phpdig-1.8.6/documentation/phpdig-doc-en.html --- modified/phpdig-1.8.6/documentation/phpdig-doc-en.html 2004-12-26 09:58:07.952581712 +0100 +++ original/phpdig-1.8.6/documentation/phpdig-doc-en.html 2003-11-08 17:17:40.000000000 +0100 @@ -75,11 +75,11 @@
3.3. File types wich can be indexed
PhpDig indexes HTML and text files by itself.
-PhpDig could index PDF, MS-Word, MS-Excel and OpenOffice files if you install
+PhpDig could index PDF, MS-Word and MS-Excel files if you install
external binaries on the spidering machines to this purpose.
PhpDig is configured using catdoc, xls2csv
-, pstotext and soffice2txt programs.
+and pstotext programs.
You could find catdoc and xls2csv at this url : @@ -92,10 +92,6 @@ http://research.compaq.com/SRC/virtualpaper/pstotext.html.
-You could find soffice2txt at this url: -http://bonnet.jeanphilippe.free.fr/softwares -
-The author does not offer support on those tools. Contact the authors of those if you have trouble in compiling and/or installing them.
@@ -355,10 +351,6 @@ define('PHPDIG_INDEX_MSEXCEL',true); define('PHPDIG_PARSE_MSEXCEL','/usr/local/bin/xls2csv'); define('PHPDIG_OPTION_MSEXCEL',''); - -define('PHPDIG_INDEX_OPENOFFICE',true); -define('PHPDIG_PARSE_OPENOFFICE','/usr/local/bin/soffice2txt.pl'); -define('PHPDIG_OPTION_OPENOFFICE','');5.5. Configuring templates
Change following variables and constants.
diff -ruN modified/phpdig-1.8.6/documentation/phpdig-doc-en.txt original/phpdig-1.8.6/documentation/phpdig-doc-en.txt --- modified/phpdig-1.8.6/documentation/phpdig-doc-en.txt 2004-12-26 10:00:58.302684560 +0100 +++ original/phpdig-1.8.6/documentation/phpdig-doc-en.txt 2003-11-08 17:17:18.000000000 +0100 @@ -79,19 +79,18 @@ PhpDig indexes HTML and text files by itself. -PhpDig could index PDF, MS-Word, MS-Excel and OpenOffice files if you install +PhpDig could index PDF, MS-Word and MS-Excel files if you install external binaries on the spidering machines to this purpose. PhpDig is configured using catdoc, xls2csv -, pstotext and soffice2txt programs. +and pstotext programs. + You could find catdoc and xls2csv at this url : http://www.45.free.net/~vitus/ice/catdoc/. Choose the 0.91.5 version. The "stable" version have trouble with some encodings and does not include xls2csv program. -You could find soffice2txt at this url: -http://bonnet.jeanphilippe.free.fr/softwares You could find pstotext at this url : http://research.compaq.com/SRC/virtualpaper/pstotext.html. @@ -381,10 +380,6 @@ define('PHPDIG_PARSE_MSEXCEL','/usr/local/bin/xls2csv'); define('PHPDIG_OPTION_MSEXCEL',''); -define('PHPDIG_INDEX_OPENOFFICE',true); -define('PHPDIG_PARSE_OPENOFFICE','/usr/local/bin/soffice2txt.pl'); -define('PHPDIG_OPTION_OPENOFFICE',''); - 5.5. Configuring templates ------------------------- diff -ruN modified/phpdig-1.8.6/documentation/phpdig-doc-fr.html original/phpdig-1.8.6/documentation/phpdig-doc-fr.html --- modified/phpdig-1.8.6/documentation/phpdig-doc-fr.html 2004-12-26 09:59:37.648945792 +0100 +++ original/phpdig-1.8.6/documentation/phpdig-doc-fr.html 2003-11-08 17:19:20.000000000 +0100 @@ -76,12 +76,12 @@3.3. Types de fichiers pouvant être indexés
PhpDig indexe nativement les fichiers HTML et texte.
-De plus, PhpDig est capable d'indexer les fichiers PDF, MS-Word,
-MS-Excel et OpenOffice si vous installez sur la machine qui effectue l'indexation
+De plus, PhpDig est capable d'indexer les fichiers PDF, MS-Word et
+MS-Excel si vous installez sur la machine qui effectue l'indexation
des exécutables externes capables d'extraire le contenu textuel
de ces documents.
PhpDig est configuré pour utiliser les utilitaires catdoc,
-xls2csv, pstotext et soffice2txt.
+xls2csv et pstotext.
Vous trouverez catdoc et xls2csv à cette adresse : @@ -90,10 +90,6 @@ le contenu des fichiers et ne contient pas le programme xls2csv.
-Vous trouverez soffice2txt à cette adresse : -http://bonnet.jeanphilippe.free.fr/softwares -
-Vous trouverez pstotext à cette adresse : http://research.compaq.com/SRC/virtualpaper/pstotext.html.
@@ -375,11 +371,6 @@ define('PHPDIG_INDEX_MSEXCEL',true); define('PHPDIG_PARSE_MSEXCEL','/usr/local/bin/xls2csv'); define('PHPDIG_OPTION_MSEXCEL',''); - -define('PHPDIG_INDEX_OPENOFFICE',true); -define('PHPDIG_PARSE_OPENOFFICE','/usr/local/bin/soffice2txt.pl'); -define('PHPDIG_OPTION_OPENOFFICE',''); -5.5. Configuration du modèle pour les résultats
Modifiez les variables et constantes suivantes.
diff -ruN modified/phpdig-1.8.6/documentation/phpdig-doc-fr.txt original/phpdig-1.8.6/documentation/phpdig-doc-fr.txt --- modified/phpdig-1.8.6/documentation/phpdig-doc-fr.txt 2004-12-26 10:00:15.848138632 +0100 +++ original/phpdig-1.8.6/documentation/phpdig-doc-fr.txt 2003-11-08 17:20:24.000000000 +0100 @@ -80,12 +80,12 @@ PhpDig indexe nativement les fichiers HTML et texte. -De plus, PhpDig est capable d'indexer les fichiers PDF, MS-Word, -MS-Excel et OpenOffice si vous installez sur la machine qui effectue l'indexation +De plus, PhpDig est capable d'indexer les fichiers PDF, MS-Word et +MS-Excel si vous installez sur la machine qui effectue l'indexation des exécutables externes capables d'extraire le contenu textuel de ces documents. PhpDig est configuré pour utiliser les utilitaires catdoc, -xls2csv, pstotext et soffice2txt. +xls2csv et pstotext. Vous trouverez catdoc et xls2csv à cette adresse : @@ -97,8 +97,6 @@ Vous trouverez pstotext à cette adresse : http://research.compaq.com/SRC/virtualpaper/pstotext.html. -Vous trouverez soffice2txt à cette adresse : -http://bonnet.jeanphilippe.free.fr/softwares Aucun support n'est fourni par l'auteur sur ces outils. Veuillez vous adresser en cas de problème de compilation ou d'utilisation aux auteurs @@ -402,10 +400,6 @@ define('PHPDIG_PARSE_MSEXCEL','/usr/local/bin/xls2csv'); define('PHPDIG_OPTION_MSEXCEL',''); -define('PHPDIG_INDEX_OPENOFFICE',true); -define('PHPDIG_PARSE_OPENOFFICE','/usr/local/bin/soffice2txt.pl'); -define('PHPDIG_OPTION_OPENOFFICE',''); - 5.5. Configuration du modèle pour les résultats ------------------------- diff -ruN modified/phpdig-1.8.6/includes/config.php original/phpdig-1.8.6/includes/config.php --- modified/phpdig-1.8.6/includes/config.php 2004-12-25 19:36:01.000000000 +0100 +++ original/phpdig-1.8.6/includes/config.php 2004-12-17 17:28:20.000000000 +0100 @@ -241,10 +241,6 @@ define('PHPDIG_PARSE_MSPOWERPOINT','/usr/local/bin/ppt2text'); define('PHPDIG_OPTION_MSPOWERPOINT',''); -define('PHPDIG_INDEX_OPENOFFICE',false); -define('PHPDIG_PARSE_OPENOFFICE','/usr/local/bin/soffice2txt.pl'); -define('PHPDIG_OPTION_OPENOFFICE',''); - //---------EXTERNAL TOOLS EXTENSIONS // if external binary is not STDOUT or different extension is needed // for example, use '.txt' if external binary writes to filename.txt @@ -252,7 +248,6 @@ define('PHPDIG_PDF_EXTENSION',''); define('PHPDIG_MSEXCEL_EXTENSION',''); define('PHPDIG_MSPOWERPOINT_EXTENSION',''); -define('PHPDIG_OPENOFFICE_EXTENSION',''); //---------FTP SETTINGS define('FTP_ENABLE',0);//enable ftp content for distant PhpDig