/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Mozilla Communicator client code, released * March 31, 1998. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998-1999 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): Akkana Peck. */ #include // for isdigit() #include "nsXPCOM.h" #include "nsParserCIID.h" #include "nsIParser.h" #include "nsIHTMLContentSink.h" #include "nsIContentSerializer.h" #include "nsLayoutCID.h" #include "nsIHTMLToTextSink.h" #include "nsIComponentManager.h" #include "nsIServiceManager.h" #include "nsIComponentRegistrar.h" #include "nsReadableUtils.h" #include "nsCRT.h" static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); int Compare(nsString& str, nsString& aFileName) { // Open the file in a Unix-centric way, // until I find out how to use nsFileSpec: char* filename = ToNewCString(aFileName); FILE* file = fopen(filename, "r"); if (!file) { fprintf(stderr, "Can't open file %s", filename); perror(" "); delete[] filename; return 2; } delete[] filename; // Inefficiently read from the file: nsString inString; int c; int index = 0; int different = 0; while ((c = getc(file)) != EOF) { inString.Append(PRUnichar(c)); // CVS isn't doing newline comparisons on these files for some reason. // So compensate for possible newline problems in the CVS file: if (c == '\n' && str[index] == '\r') ++index; if (c != str[index++]) { //printf("Comparison failed at char %d: generated was %d, file had %d\n", // index, (int)str[index-1], (int)c); different = index; break; } } if (file != stdin) fclose(file); if (!different) return 0; else { nsAutoString left; str.Left(left, different); char* cstr = ToNewUTF8String(left); printf("Comparison failed at char %d:\n-----\n%s\n-----\n", different, cstr); Recycle(cstr); return 1; } } //---------------------------------------------------------------------- // Convert html on stdin to either plaintext or (if toHTML) html //---------------------------------------------------------------------- nsresult HTML2text(nsString& inString, nsString& inType, nsString& outType, int flags, int wrapCol, nsString& compareAgainst) { nsresult rv = NS_OK; nsString outString; // Create a parser nsIParser* parser; rv = nsComponentManager::CreateInstance(kParserCID, nsnull, kIParserIID,(void**)&parser); if (NS_FAILED(rv)) { printf("Unable to create a parser : 0x%x\n", rv); return NS_ERROR_FAILURE; } // Create the appropriate output sink #ifdef USE_SERIALIZER nsCAutoString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX); progId.AppendWithConversion(outType); // The syntax used here doesn't work nsCOMPtr mSerializer; mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId)); NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED); mSerializer->Init(flags, wrapCol); nsCOMPtr sink (do_QueryInterface(mSerializer)); if (!sink) { printf("Couldn't get content sink!\n"); return NS_ERROR_UNEXPECTED; } #else /* USE_SERIALIZER */ nsCOMPtr sink; if (inType != NS_LITERAL_STRING("text/html") || outType != NS_LITERAL_STRING("text/plain")) { char* in = ToNewCString(inType); char* out = ToNewCString(outType); printf("Don't know how to convert from %s to %s\n", in, out); Recycle(in); Recycle(out); return NS_ERROR_FAILURE; } sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID); NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE); nsCOMPtr textSink(do_QueryInterface(sink)); NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE); textSink->Initialize(&outString, flags, wrapCol); #endif /* USE_SERIALIZER */ parser->SetContentSink(sink); nsCOMPtr dtd; if (inType.Equals(NS_LITERAL_STRING("text/html"))) { static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID); rv=nsComponentManager::CreateInstance(kNavDTDCID,nsnull,NS_GET_IID(nsIDTD),getter_AddRefs(dtd)); } else { printf("Don't know how to deal with non-html input!\n"); return NS_ERROR_NOT_IMPLEMENTED; } if (NS_FAILED(rv)) { printf("Couldn't create new HTML DTD: 0x%x\n", rv); return rv; } parser->RegisterDTD(dtd); rv = parser->Parse(inString, 0, NS_LossyConvertUCS2toASCII(inType), PR_FALSE, PR_TRUE); if (NS_FAILED(rv)) { printf("Parse() failed! 0x%x\n", rv); return rv; } NS_RELEASE(parser); if (compareAgainst.Length() > 0) return Compare(outString, compareAgainst); char* charstar = ToNewUTF8String(outString); printf("Output string is:\n--------------------\n%s--------------------\n", charstar); delete[] charstar; return NS_OK; } //---------------------------------------------------------------------- int main(int argc, char** argv) { nsString inType(NS_LITERAL_STRING("text/html")); nsString outType(NS_LITERAL_STRING("text/plain")); int wrapCol = 72; int flags = 0; nsString compareAgainst; // Skip over progname arg: const char* progname = argv[0]; --argc; ++argv; // Process flags while (argc > 0 && argv[0][0] == '-') { switch (argv[0][1]) { case 'h': printf("\ Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\ \tIn/out types are mime types (e.g. text/html)\n\ \tcomparison_file is a file against which to compare the output\n\ \n\ \tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n", progname); exit(0); case 'i': if (argv[0][2] != '\0') inType.AssignWithConversion(argv[0]+2); else { inType.AssignWithConversion(argv[1]); --argc; ++argv; } break; case 'o': if (argv[0][2] != '\0') outType.AssignWithConversion(argv[0]+2); else { outType.AssignWithConversion(argv[1]); --argc; ++argv; } break; case 'w': if (isdigit(argv[0][2])) wrapCol = atoi(argv[0]+2); else { wrapCol = atoi(argv[1]); --argc; ++argv; } break; case 'f': if (isdigit(argv[0][2])) flags = atoi(argv[0]+2); else { flags = atoi(argv[1]); --argc; ++argv; } break; case 'c': if (argv[0][2] != '\0') compareAgainst.AssignWithConversion(argv[0]+2); else { compareAgainst.AssignWithConversion(argv[1]); --argc; ++argv; } break; } ++argv; --argc; } FILE* file = 0; if (argc > 0) // read from a file { // Open the file in a Unix-centric way, // until I find out how to use nsFileSpec: file = fopen(argv[0], "r"); if (!file) { fprintf(stderr, "Can't open file %s", argv[0]); perror(" "); exit(1); } } else file = stdin; nsresult ret; { nsCOMPtr servMan; NS_InitXPCOM2(getter_AddRefs(servMan), nsnull, nsnull); nsCOMPtr registrar = do_QueryInterface(servMan); NS_ASSERTION(registrar, "Null nsIComponentRegistrar"); registrar->AutoRegister(nsnull); // Read in the string: very inefficient, but who cares? nsString inString; int c; while ((c = getc(file)) != EOF) inString.Append(PRUnichar(c)); if (file != stdin) fclose(file); ret = HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst); } // this scopes the nsCOMPtrs // no nsCOMPtrs are allowed to be alive when you call NS_ShutdownXPCOM nsresult rv = NS_ShutdownXPCOM( NULL ); NS_ASSERTION(NS_SUCCEEDED(rv), "NS_ShutdownXPCOM failed"); return ret; }