Class HTMLScanner.ContentScanner

    • Field Detail

      • qName_

        private final QName qName_
        A qualified name.
      • scanStartElement_

        private java.lang.String scanStartElement_
    • Constructor Detail

      • ContentScanner

        public ContentScanner()
    • Method Detail

      • scan

        public int scan​(boolean complete)
                 throws java.io.IOException
        Scan.
        Specified by:
        scan in interface HTMLScanner.Scanner
        Parameters:
        complete - True if the scanner should not return until scanning is complete.
        Returns:
        True if additional scanning is required.
        Throws:
        java.io.IOException - Thrown if I/O error occurs.
      • eof

        private void eof()
      • scanUntilEndTag

        private void scanUntilEndTag​(java.lang.String tagName)
                              throws java.io.IOException
        Scans the content of <noscript&gr;: it doesn't get parsed but is considered as plain text when feature HTMLScanner.PARSE_NOSCRIPT_CONTENT is set to false.
        Parameters:
        tagName - the tag for which content is scanned (one of "noscript", "noframes", "iframe")
        Throws:
        java.io.IOException - on error
      • scanCharacters

        protected void scanCharacters()
                               throws java.io.IOException
        Throws:
        java.io.IOException
      • scanCDATA

        protected int scanCDATA()
                         throws java.io.IOException
        Throws:
        java.io.IOException
      • scanComment

        protected int scanComment()
                           throws java.io.IOException
        Throws:
        java.io.IOException
      • scanCommentContent

        protected int scanCommentContent​(XMLString buffer)
                                  throws java.io.IOException
        Throws:
        java.io.IOException
      • scanCDataContent

        protected int scanCDataContent​(XMLString xmlString)
                                throws java.io.IOException
        Throws:
        java.io.IOException
      • scanPI

        protected int scanPI()
                      throws java.io.IOException
        Throws:
        java.io.IOException
      • scanStartElement

        protected int scanStartElement​(boolean[] empty)
                                throws java.io.IOException
        Scans a start element.
        Parameters:
        empty - Is used for a second return value to indicate whether the start element tag is empty (e.g. "/>").
        Returns:
        ename
        Throws:
        java.io.IOException - in case of io problems
      • removeSpaces

        private java.lang.String removeSpaces​(java.lang.String content)
        Removes all whitespaces from the string
      • changeEncoding

        private boolean changeEncoding​(java.lang.String charset)
        Tries to change the encoding used to read the input stream to the specified one
        Parameters:
        charset - the charset that should be used
        Returns:
        true when the encoding has been changed
      • scanAttribute

        protected int scanAttribute​(XMLAttributesImpl attributes,
                                    boolean[] empty)
                             throws java.io.IOException
        Scans a real attribute.
        Parameters:
        attributes - The list of attributes.
        empty - Is used for a second return value to indicate whether the start element tag is empty (e.g. "/>").
        Returns:
        success
        Throws:
        java.io.IOException - in case of io problems
      • scanAttributeUnquotedValue

        protected int scanAttributeUnquotedValue​(HTMLScanner.CurrentEntity currentEntity,
                                                 XMLString attribValue,
                                                 XMLString plainAttribValue)
                                          throws java.io.IOException
        Throws:
        java.io.IOException
      • scanAttributeQuotedValue

        protected int scanAttributeQuotedValue​(int currentQuote,
                                               HTMLScanner.CurrentEntity currentEntity,
                                               XMLString attribValue,
                                               XMLString plainAttribValue,
                                               boolean normalizeAttributes)
                                        throws java.io.IOException
        Throws:
        java.io.IOException
      • scanEndElement

        protected void scanEndElement()
                               throws java.io.IOException
        Throws:
        java.io.IOException