package ever.pipeline; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.util.HashSet; import java.util.Iterator; import java.util.NoSuchElementException; import javax.xml.transform.stream.StreamSource; import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.QName; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.XPathCompiler; import net.sf.saxon.s9api.XPathExecutable; import net.sf.saxon.s9api.XPathSelector; import net.sf.saxon.s9api.XQueryCompiler; import net.sf.saxon.s9api.XQueryEvaluator; import net.sf.saxon.s9api.XQueryExecutable; import net.sf.saxon.s9api.XdmAtomicValue; import net.sf.saxon.s9api.XdmItem; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.xpath.XPathEvaluator; /** * * This class handles the access of the extraction tool to * the xml file. * */ public class XmlHandler { // node with the recipe private static XdmNode input; private static Processor proc = new Processor(false); private static File in; /** * Configure handler for a new file * * @param inf */ public static void setInputFile(File inf) { in = inf; DocumentBuilder newDocumentBuilder = proc.newDocumentBuilder(); try { input = newDocumentBuilder.build(new StreamSource( new FileInputStream(in))); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SaxonApiException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Copies the text between the tags in a text file in order that it * can be processed by the sundance tool */ public static void copyPrepTextToSundanceInput() { String query = "let $steps:=/howto/steps/step \n" + "for $i in $steps\n" + "for $token at $index in tokenize($i,'\"') \n" + "return \n" + " if( $index mod 2=0) \n" + " then replace($token,' ','_')\n" + " else $token"; XQueryCompiler newXQueryCompiler = proc.newXQueryCompiler(); XQueryExecutable compile = null; try { compile = newXQueryCompiler.compile(query); XQueryEvaluator load = compile.load(); load.setContextItem(input); Serializer serial = new Serializer(); FileOutputStream outs; outs = new FileOutputStream("singlePrep.txt"); serial.setOutputStream(outs); serial.setOutputProperty(Serializer.Property.METHOD, "text"); serial.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes"); serial.setOutputProperty(Serializer.Property.ENCODING, "utf-8"); load.run(serial); // System.out.println("Copying file."); } catch (SaxonApiException e) { // System.err.println("Current file: " + in.getAbsolutePath()); e.printStackTrace(); in.renameTo(new File(in.getAbsolutePath().replace("&", " and "))); } catch (FileNotFoundException e) { // TODO Auto-generated catch block System.err.println("Current file: " + in.getAbsolutePath()); e.printStackTrace(); } catch (Exception ex) { System.err.println("Current file: " + in.getAbsolutePath()); } } /** * Copies the text between the tags in of the string in order that * it can be processed by the sundance tool */ public static void copyPrepTextToSundanceInput(String xmlSource) { String query = "let $steps:=/howto/steps/step \n" + "for $i in $steps\n" + "for $token at $index in tokenize($i,'\"') \n" + "return \n" + " if( $index mod 2=0) \n" + " then replace($token,' ','_')\n" + " else $token"; XQueryCompiler newXQueryCompiler = proc.newXQueryCompiler(); DocumentBuilder newDocumentBuilder = proc.newDocumentBuilder(); XdmNode doc = null; XQueryExecutable compile = null; try { try { doc = newDocumentBuilder.build(new StreamSource( new ByteArrayInputStream(xmlSource.getBytes()))); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } compile = newXQueryCompiler.compile(query); XQueryEvaluator load = compile.load(); load.setContextItem(doc); Serializer serial = new Serializer(); FileOutputStream outs; outs = new FileOutputStream("singlePrep.txt"); serial.setOutputStream(outs); serial.setOutputProperty(Serializer.Property.METHOD, "text"); serial.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes"); serial.setOutputProperty(Serializer.Property.ENCODING, "utf-8"); load.run(serial); } catch (SaxonApiException ex) { ex.printStackTrace(); } catch (FileNotFoundException fne) { fne.printStackTrace(); } // System.out.println("Copying file."); } /** * Tests if a step starts with "Meanwhile" * @param raw * @return */ public static boolean isMeanwhileStep(String raw) { String query = "declare variable $sentence as xs:string external; \n" + "let $steps := /howto/steps/step \n" + "for $i at $index in $steps \n" + "return if(starts-with($i,\"Meanwhile \") and contains($i,$sentence)) \n" + "then \"true\" \n" + "else \"\""; XQueryCompiler newXQueryCompiler = proc.newXQueryCompiler(); XQueryExecutable compile = null; try { compile = newXQueryCompiler.compile(query); XQueryEvaluator load = compile.load(); load.setContextItem(input); load.setExternalVariable(new QName("sentence"), new XdmAtomicValue( raw)); Serializer serial = new Serializer(); ByteArrayOutputStream outs = new ByteArrayOutputStream(); serial.setOutputStream(outs); serial.setOutputProperty(Serializer.Property.METHOD, "text"); serial.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes"); serial.setOutputProperty(Serializer.Property.ENCODING, "utf-8"); load.run(serial); boolean ret = false; try { ret = Boolean.parseBoolean(outs.toString().trim()); } catch (NumberFormatException ex) { } // System.out.println(ret); return ret; } catch (SaxonApiException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } } /** * Tests if a step is preceding a Meanwhile step * @param raw * @return */ public static int isPreMeanwhileStep(String raw) { String query = "declare variable $sentence as xs:string external; \n" + "let $steps := /howto/steps/step \n" + "for $i at $index in $steps \n" + "return if(starts-with($i/following::step[position()=1]/text(),\"Meanwhile \") and contains($i,$sentence)) \n" + "then $index \n" + "else \"\""; XQueryCompiler newXQueryCompiler = proc.newXQueryCompiler(); XQueryExecutable compile = null; try { compile = newXQueryCompiler.compile(query); XQueryEvaluator load = compile.load(); load.setContextItem(input); load.setExternalVariable(new QName("sentence"), new XdmAtomicValue( raw)); Serializer serial = new Serializer(); ByteArrayOutputStream outs = new ByteArrayOutputStream(); serial.setOutputStream(outs); serial.setOutputProperty(Serializer.Property.METHOD, "text"); serial.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes"); serial.setOutputProperty(Serializer.Property.ENCODING, "utf-8"); load.run(serial); int ret = -1; try { ret = Integer.parseInt(outs.toString().trim()); } catch (NumberFormatException ex) { } return ret; } catch (SaxonApiException e) { // TODO Auto-generated catch block e.printStackTrace(); return -1; } } /** * Tests if the keyword "optional" is in an ingredient * @param ing * @return */ public static boolean isIngOptional(String ing) { String query = "declare variable $ing as xs:string external; \n" + "let $prod := /howto/products/product[contains(text(),$ing)] \n" + "for $i at $index in $prod \n" + "return if(contains($i,\"(optional)\")) \n" + "then \"true\" \n" + "else \"\""; XQueryCompiler newXQueryCompiler = proc.newXQueryCompiler(); XQueryExecutable compile = null; try { compile = newXQueryCompiler.compile(query); XQueryEvaluator load = compile.load(); load.setContextItem(input); load.setExternalVariable(new QName("ing"), new XdmAtomicValue(ing)); Serializer serial = new Serializer(); ByteArrayOutputStream outs = new ByteArrayOutputStream(); serial.setOutputStream(outs); serial.setOutputProperty(Serializer.Property.METHOD, "text"); serial.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes"); serial.setOutputProperty(Serializer.Property.ENCODING, "utf-8"); load.run(serial); boolean ret = false; try { ret = Boolean.parseBoolean(outs.toString().trim()); } catch (NumberFormatException ex) { } // System.out.println(ing+" "+ret); return ret; } catch (SaxonApiException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } } /** * Returns the product/ingredient list of a process description * @param xmlSource * @return */ public static HashSet getProductList(String xmlSource) { HashSet ret = new HashSet(); String query = "/howto/products/product"; DocumentBuilder newDocumentBuilder = proc.newDocumentBuilder(); XdmNode doc = null; try { doc = newDocumentBuilder.build(new StreamSource( new ByteArrayInputStream(xmlSource.getBytes()))); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } XPathExecutable exec = null; XPathCompiler xpath = proc.newXPathCompiler(); try { exec = xpath.compile(query); XPathSelector eval = exec.load(); eval.setContextItem(doc); eval.evaluate(); Iterator it = eval.iterator(); XdmItem current = null; if (it.hasNext()) { current = it.next(); } while (current != null) { ret.add(current.getStringValue()); try { current = it.next(); } catch (NoSuchElementException ex) { break; } } } catch (SaxonApiException e) { // TODO Auto-generated catch block e.printStackTrace(); } return ret; } }