Sync the Java sources of the HTML5 parser with the parser repo. rs=sicking. NPOTB.

This commit is contained in:
Henri Sivonen 2010-04-09 18:48:29 +03:00
parent 73371f6e8d
commit cee4fa1bec
8 changed files with 131 additions and 19 deletions

View File

@ -508,20 +508,45 @@ public final class AttributeName
}
// [NOCPP[
/**
* Creator for use when the XML violation policy requires an attribute name
* to be changed.
*
* @param name
* the name of the attribute to create
*/
static AttributeName create(@Local String name) {
return new AttributeName(AttributeName.ALL_NO_NS,
AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX,
AttributeName.ALL_NCNAME, false);
}
/**
* Queries whether this name is an XML 1.0 4th ed. NCName.
*
* @param mode
* the SVG/MathML/HTML mode
* @return <code>true</code> if this is an NCName in the given mode
*/
public boolean isNcName(int mode) {
return ncname[mode];
}
/**
* Queries whether this is an <code>xmlns</code> attribute.
*
* @return <code>true</code> if this is an <code>xmlns</code> attribute
*/
public boolean isXmlns() {
return xmlns;
}
/**
* Queries whether this attribute has a case-folded value in the HTML4 mode
* of the parser.
*
* @return <code>true</code> if the value is case-folded
*/
boolean isCaseFolded() {
return this == AttributeName.ACTIVE || this == AttributeName.ALIGN
|| this == AttributeName.ASYNC

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2009 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -32,8 +32,14 @@ import org.xml.sax.SAXException;
public abstract class MetaScanner {
/**
* Constant for "charset".
*/
private static final @NoLength char[] CHARSET = "charset".toCharArray();
/**
* Constant for "content".
*/
private static final @NoLength char[] CONTENT = "content".toCharArray();
private static final int NO = 0;
@ -86,18 +92,39 @@ public abstract class MetaScanner {
private static final int SELF_CLOSING_START_TAG = 20;
/**
* The data source.
*/
protected ByteReadable readable;
/**
* The state of the state machine that recognizes the tag name "meta".
*/
private int metaState = NO;
/**
* The current position in recognizing the attribute name "content".
*/
private int contentIndex = -1;
/**
* The current position in recognizing the attribute name "charset".
*/
private int charsetIndex = -1;
/**
* The tokenizer state.
*/
protected int stateSave = DATA;
/**
* The currently filled length of strBuf.
*/
private int strBufLen;
/**
* Accumulation buffer for attribute values.
*/
private char[] strBuf;
// [NOCPP[
@ -119,6 +146,8 @@ public abstract class MetaScanner {
}
/**
* Reads a byte from the data source.
*
* -1 means end.
* @return
* @throws IOException
@ -131,6 +160,9 @@ public abstract class MetaScanner {
// WARNING When editing this, makes sure the bytecode length shown by javap
// stays under 8000 bytes!
/**
* The runs the meta scanning algorithm.
*/
protected final void stateLoop(int state)
throws SAXException, IOException {
int c = -1;
@ -658,6 +690,10 @@ public abstract class MetaScanner {
stateSave = state;
}
/**
* Adds a character to the accumulation buffer.
* @param c the character to add
*/
private void addToBuffer(int c) {
if (strBufLen == strBuf.length) {
char[] newBuf = new char[strBuf.length + (strBuf.length << 1)];
@ -668,6 +704,11 @@ public abstract class MetaScanner {
strBuf[strBufLen++] = (char)c;
}
/**
* Attempts to extract a charset name from the accumulation buffer.
* @return <code>true</code> if successful
* @throws SAXException
*/
private boolean tryCharset() throws SAXException {
if (metaState != A || !(contentIndex == 6 || charsetIndex == 6)) {
return false;
@ -690,6 +731,13 @@ public abstract class MetaScanner {
return success;
}
/**
* Tries to switch to an encoding.
*
* @param encoding
* @return <code>true</code> if successful
* @throws SAXException
*/
protected abstract boolean tryCharset(String encoding) throws SAXException;

View File

@ -168,7 +168,7 @@ public final class Portability {
}
public static char[] isIndexPrompt() {
return "This is a searchable index. Insert your search keywords here: ".toCharArray();
return "This is a searchable index. Enter search keywords: ".toCharArray();
}
public static void delete(Object o) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -24,17 +24,42 @@ package nu.validator.htmlparser.impl;
import nu.validator.htmlparser.annotation.NoLength;
/**
* An UTF-16 buffer that knows the start and end indeces of its unconsumed
* content.
*
* @version $Id$
* @author hsivonen
*/
public final class UTF16Buffer {
/**
* The backing store of the buffer. May be larger than the logical content
* of this <code>UTF16Buffer</code>.
*/
private final @NoLength char[] buffer;
/**
* The index of the first unconsumed character in the backing buffer.
*/
private int start;
/**
* The index of the slot immediately after the last character in the backing
* buffer that is part of the logical content of this
* <code>UTF16Buffer</code>.
*/
private int end;
/**
* Constructor for wrapping an existing UTF-16 code unit array.
*
* @param buffer
* the backing buffer
* @param start
* the index of the first character to consume
* @param end
* the index immediately after the last character to consume
*/
public UTF16Buffer(@NoLength char[] buffer, int start, int end) {
this.buffer = buffer;
@ -43,45 +68,58 @@ public final class UTF16Buffer {
}
/**
* Returns the start.
* Returns the start index.
*
* @return the start
* @return the start index
*/
public int getStart() {
return start;
}
/**
* Sets the start.
* Sets the start index.
*
* @param start the start to set
* @param start
* the start index
*/
public void setStart(int start) {
this.start = start;
}
/**
* Returns the buffer.
* Returns the backing buffer.
*
* @return the buffer
* @return the backing buffer
*/
public @NoLength char[] getBuffer() {
return buffer;
}
/**
* Returns the end.
* Returns the end index.
*
* @return the end
* @return the end index
*/
public int getEnd() {
return end;
}
/**
* Checks if the buffer has data left.
*
* @return <code>true</code> if there's data left
*/
public boolean hasMore() {
return start < end;
}
/**
* Adjusts the start index to skip over the first character if it is a line
* feed and the previous character was a carriage return.
*
* @param lastWasCR
* whether the previous character was a carriage return
*/
public void adjust(boolean lastWasCR) {
if (lastWasCR && buffer[start] == '\n') {
start++;
@ -89,9 +127,10 @@ public final class UTF16Buffer {
}
/**
* Sets the end.
* Sets the end index.
*
* @param end the end to set
* @param end
* the end index
*/
public void setEnd(int end) {
this.end = end;

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2009 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2009 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008 Mozilla Foundation
* Copyright (c) 2008-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),