This is the mail archive of the docbook-tools-discuss@sources.redhat.com mailing list for the docbook-tools project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

XHTML support for xmlo


Hi Tim:

I've just extended xmlto to convert XHTML files into XSL:FO/PDF/txt.
I've packaged an xhtml2fo stylesheet from Antennahouse.

Included with this message you get the patch and modified spec file. You
can get RPM's from my website:

http://www.olea.org/paquetes-rpm/xmlto-0.0.18-4_1olea.src.rpm
http://www.olea.org/paquetes-rpm/xmlto-0.0.18-4_1olea.i386.rpm
http://www.olea.org/paquetes-rpm/xhtml2fo-style-xsl-20050106-1.src.rpm
http://www.olea.org/paquetes-rpm/xhtml2fo-style-xsl-20050106-1.noarch.rpm

The new stylesheet is not perfect but offers a great feature and my test
doesn't show any operation problem, so IMHO is ready for the use.

The integration had been relatively easy cause the nice xmlto
architecture. Now will be a bit more easy to add new source formats.

Honestly, would be beautiful to see then published in future Fedora Core
releases :-)





-- 

        A. Ismael Olea GonzÃlez
 
        mailto:ismael@olea.org  http://www.olea.org
        http://aduaneros.olea.org, la ONG sin futuro.
 
        El mundo debe empezar a tener miedo a un planeta OLEA

diff -Naur xmlto-0.0.18-orig/format/xhtml1/dvi xmlto-0.0.18/format/xhtml1/dvi
--- xmlto-0.0.18-orig/format/xhtml1/dvi	1970-01-01 01:00:00.000000000 +0100
+++ xmlto-0.0.18/format/xhtml1/dvi	2005-01-10 23:47:40.000000000 +0100
@@ -0,0 +1,13 @@
+case "$1" in
+stylesheet)
+  if [ "$VERBOSE" -ge 1 ]
+  then
+    echo >&2 "Convert to XSL-FO"
+  fi
+  echo "http://www.antennahouse.com/XSLsample/sample-xsl-xhtml2fo/xhtml2fo.xsl";
+  ;;
+post-process)
+  # Get the FO format script to do the rest
+  sh "$(dirname "$0")/../fo/$(basename "$0")" "$1"
+  ;;
+esac
diff -Naur xmlto-0.0.18-orig/format/xhtml1/fo xmlto-0.0.18/format/xhtml1/fo
--- xmlto-0.0.18-orig/format/xhtml1/fo	1970-01-01 01:00:00.000000000 +0100
+++ xmlto-0.0.18/format/xhtml1/fo	2005-01-10 23:48:44.000000000 +0100
@@ -0,0 +1,12 @@
+case "$1" in
+stylesheet)
+  if [ "$VERBOSE" -ge 1 ]
+  then
+    echo >&2 "Convert to XSL-FO"
+  fi
+  echo "http://www.antennahouse.com/XSLsample/sample-xsl-xhtml2fo/xhtml2fo.xsl";
+  ;;
+post-process)
+  cp "$XSLT_PROCESSED" "$OUTPUT_DIR/$(basename ${XSLT_PROCESSED%.*}).fo"
+  ;;
+esac
diff -Naur xmlto-0.0.18-orig/format/xhtml1/pdf xmlto-0.0.18/format/xhtml1/pdf
--- xmlto-0.0.18-orig/format/xhtml1/pdf	1970-01-01 01:00:00.000000000 +0100
+++ xmlto-0.0.18/format/xhtml1/pdf	2005-01-10 23:49:22.000000000 +0100
@@ -0,0 +1,13 @@
+case "$1" in
+stylesheet)
+  if [ "$VERBOSE" -ge 1 ]
+  then
+    echo >&2 "Convert to XSL-FO"
+  fi
+  echo "http://www.antennahouse.com/XSLsample/sample-xsl-xhtml2fo/xhtml2fo.xsl";
+  ;;
+post-process)
+  # Get the FO format script to do the rest
+  sh "$(dirname "$0")/../fo/$(basename "$0")" "$1"
+  ;;
+esac
diff -Naur xmlto-0.0.18-orig/format/xhtml1/ps xmlto-0.0.18/format/xhtml1/ps
--- xmlto-0.0.18-orig/format/xhtml1/ps	1970-01-01 01:00:00.000000000 +0100
+++ xmlto-0.0.18/format/xhtml1/ps	2005-01-10 23:49:32.000000000 +0100
@@ -0,0 +1,13 @@
+case "$1" in
+stylesheet)
+  if [ "$VERBOSE" -ge 1 ]
+  then
+    echo >&2 "Convert to XSL-FO"
+  fi
+  echo "http://www.antennahouse.com/XSLsample/sample-xsl-xhtml2fo/xhtml2fo.xsl";
+  ;;
+post-process)
+  # Get the FO format script to do the rest
+  sh "$(dirname "$0")/../fo/$(basename "$0")" "$1"
+  ;;
+esac
diff -Naur xmlto-0.0.18-orig/format/xhtml1/txt xmlto-0.0.18/format/xhtml1/txt
--- xmlto-0.0.18-orig/format/xhtml1/txt	1970-01-01 01:00:00.000000000 +0100
+++ xmlto-0.0.18/format/xhtml1/txt	2005-01-10 23:50:43.000000000 +0100
@@ -0,0 +1,27 @@
+if [ -x /usr/bin/w3m ]
+then
+  CONVERT=/usr/bin/w3m
+  ARGS="-T text/html -dump"
+elif [ -x /usr/bin/lynx ]
+then
+  CONVERT=/usr/bin/lynx
+  ARGS="-force_html -dump -nolist -width=72"
+elif [ -x /usr/bin/links ]
+then
+  CONVERT=/usr/bin/links
+  ARGS="-dump"
+else
+  echo >&2 "No way to convert HTML to text found."
+  exit 1
+fi
+
+case "$1" in
+post-process)
+  if [ "$VERBOSE" -ge 1 ]
+  then
+    echo >&2 "Convert HTML to ASCII"
+  fi
+  ${CONVERT} ${ARGS} ${POSTARGS} ${XSLT_PROCESSED} > \
+   "$OUTPUT_DIR/$(basename ${XSLT_PROCESSED%.*}).txt"
+  ;;
+esac
diff -Naur xmlto-0.0.18-orig/Makefile.am xmlto-0.0.18/Makefile.am
--- xmlto-0.0.18-orig/Makefile.am	2003-10-30 23:50:36.000000000 +0100
+++ xmlto-0.0.18/Makefile.am	2005-01-10 23:40:14.000000000 +0100
@@ -19,6 +19,11 @@
 	format/fo/dvi \
 	format/fo/pdf \
 	format/fo/ps \
+	format/xhtml1/fo \
+	format/xhtml1/pdf \
+	format/xhtml1/ps \
+	format/xhtml1/dvi \
+	format/xhtml1/txt \
 	xmlto.mak
 
 EXTRA_DIST = xmlto.spec \
@@ -38,6 +43,11 @@
 	format/fo/dvi \
 	format/fo/pdf \
 	format/fo/ps \
+	format/xhtml1/fo \
+	format/xhtml1/pdf \
+	format/xhtml1/ps \
+	format/xhtml1/dvi \
+	format/xhtml1/txt \
 	doc/xmlto.xml \
 	doc/xmlif.xml \
 	xmlto.mak \
diff -Naur xmlto-0.0.18-orig/Makefile.in xmlto-0.0.18/Makefile.in
--- xmlto-0.0.18-orig/Makefile.in	2004-01-21 12:07:48.000000000 +0100
+++ xmlto-0.0.18/Makefile.in	2005-01-10 23:40:14.000000000 +0100
@@ -184,6 +184,11 @@
 	format/fo/dvi \
 	format/fo/pdf \
 	format/fo/ps \
+	format/xhtml1/fo \
+	format/xhtml1/pdf \
+	format/xhtml1/ps \
+	format/xhtml1/dvi \
+	format/xhtml1/txt \
 	xmlto.mak
 
 EXTRA_DIST = xmlto.spec \
@@ -203,6 +208,11 @@
 	format/fo/dvi \
 	format/fo/pdf \
 	format/fo/ps \
+	format/xhtml1/fo \
+	format/xhtml1/pdf \
+	format/xhtml1/ps \
+	format/xhtml1/dvi \
+	format/xhtml1/txt \
 	doc/xmlto.xml \
 	doc/xmlif.xml \
 	xmlto.mak \
diff -Naur xmlto-0.0.18-orig/xmlto.in xmlto-0.0.18/xmlto.in
--- xmlto-0.0.18-orig/xmlto.in	2004-01-02 13:03:24.000000000 +0100
+++ xmlto-0.0.18/xmlto.in	2005-01-11 02:04:03.000000000 +0100
@@ -247,15 +247,28 @@
   exit 1
 fi
 
+
+[ ! -e "$INPUT_FILE" ] && exit 1
+
 # Decide what source format this is.  Default to DocBook.
-rootel=$(head -n 2 "$INPUT_FILE" | \
-     sed -e 's/^<?[^?>]*?>//g' -e 's/^<![^>]*>//g' -e 's/^<\([^ ]*\).*$/\1/')
+#rootel=$(head -n 2 "$INPUT_FILE" | \
+#     sed -e 's/^<?[^?>]*?>//g' -e 's/^<![^>]*>//g' -e 's/^<\([^ ]*\).*$/\1/')
+
+# Seems reasonable fix the file command and teach it to identify the DTD/Schema but this is faster to write:
+rootel=$(echo "xpath *" | xmllint --shell $INPUT_FILE  2> /dev/null |head -n 3 |tail -n 1 | cut -f 4 -d " " )
+
 case $(echo $rootel) in
+html)
+	SOURCE_FORMAT="xhtml1"
+	;;
 fo:root)
 	SOURCE_FORMAT="fo"
 	;;
+article|book|part|refentry|set)
+	SOURCE_FORMAT="docbook"
+	;;
 esac
-[ ! -e "$INPUT_FILE" ] && exit 1
+[ "$VERBOSE" -ge 1 ] && echo >&2 "Source format: ${SOURCE_FORMAT}"
 
 # If the destination format is an absolute pathname then it's a
 # user-defined format script.  Otherwise it's one of ours.

Attachment: xmlto.spec
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]