/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Url Classifier code * * The Initial Developer of the Original Code is * Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Tony Chang <tony@ponderer.org> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ const Cc = Components.classes; const Ci = Components.interfaces; // js/lang.js is needed for Function.prototype.inherts /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Google Safe Browsing. * * The Initial Developer of the Original Code is Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Aaron Boodman <aa@google.com> (original author) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ // This file has pure js helper functions. Hence you'll find metion // of browser-specific features in here. /** * lang.js - The missing JavaScript language features * * WARNING: This class adds members to the prototypes of String, Array, and * Function for convenience. * * The tradeoff is that the for/in statement will not work properly for those * objects when this library is used. * * To work around this for Arrays, you may want to use the forEach() method, * which is more fun and easier to read. */ /** * Returns true if the specified value is |null| */ function isNull(val) { return val === null; } /** * Returns true if the specified value is an array */ function isArray(val) { return isObject(val) && val.constructor == Array; } /** * Returns true if the specified value is a string */ function isString(val) { return typeof val == "string"; } /** * Returns true if the specified value is a boolean */ function isBoolean(val) { return typeof val == "boolean"; } /** * Returns true if the specified value is a number */ function isNumber(val) { return typeof val == "number"; } /** * Returns true if the specified value is a function */ function isFunction(val) { return typeof val == "function"; } /** * Returns true if the specified value is an object */ function isObject(val) { return val && typeof val == "object"; } /** * Returns an array of all the properties defined on an object */ function getObjectProps(obj) { var ret = []; for (var p in obj) { ret.push(p); } return ret; } /** * Returns true if the specified value is an object which has no properties * defined. */ function isEmptyObject(val) { if (!isObject(val)) { return false; } for (var p in val) { return false; } return true; } var getHashCode; var removeHashCode; (function () { var hashCodeProperty = "lang_hashCode_"; /** * Adds a lang_hashCode_ field to an object. The hash code is unique for the * given object. * @param obj {Object} The object to get the hash code for * @returns {Number} The hash code for the object */ getHashCode = function(obj) { // In IE, DOM nodes do not extend Object so they do not have this method. // we need to check hasOwnProperty because the proto might have this set. if (obj.hasOwnProperty && obj.hasOwnProperty(hashCodeProperty)) { return obj[hashCodeProperty]; } if (!obj[hashCodeProperty]) { obj[hashCodeProperty] = ++getHashCode.hashCodeCounter_; } return obj[hashCodeProperty]; }; /** * Removes the lang_hashCode_ field from an object. * @param obj {Object} The object to remove the field from. */ removeHashCode = function(obj) { obj.removeAttribute(hashCodeProperty); }; getHashCode.hashCodeCounter_ = 0; })(); /** * Fast prefix-checker. */ String.prototype.startsWith = function(prefix) { if (this.length < prefix.length) { return false; } if (this.substring(0, prefix.length) == prefix) { return true; } return false; } /** * Removes whitespace from the beginning and end of the string */ String.prototype.trim = function() { return this.replace(/^\s+|\s+$/g, ""); } /** * Does simple python-style string substitution. * "foo%s hot%s".subs("bar", "dog") becomes "foobar hotdot". * For more fully-featured templating, see template.js. */ String.prototype.subs = function() { var ret = this; // this appears to be slow, but testing shows it compares more or less equiv. // to the regex.exec method. for (var i = 0; i < arguments.length; i++) { ret = ret.replace(/\%s/, String(arguments[i])); } return ret; } /** * Returns the last element on an array without removing it. */ Array.prototype.peek = function() { return this[this.length - 1]; } // TODO(anyone): add splice the first time someone needs it and then implement // push, pop, shift, unshift in terms of it where possible. // TODO(anyone): add the other neat-o functional methods like map(), etc. /** * Partially applies this function to a particular "this object" and zero or * more arguments. The result is a new function with some arguments of the first * function pre-filled and the value of |this| "pre-specified". * * Remaining arguments specified at call-time are appended to the pre- * specified ones. * * Also see: partial(). * * Note that bind and partial are optimized such that repeated calls to it do * not create more than one function object, so there is no additional cost for * something like: * * var g = bind(f, obj); * var h = partial(g, 1, 2, 3); * var k = partial(h, a, b, c); * * Usage: * var barMethBound = bind(myFunction, myObj, "arg1", "arg2"); * barMethBound("arg3", "arg4"); * * @param thisObj {object} Specifies the object which |this| should point to * when the function is run. If the value is null or undefined, it will default * to the global object. * * @returns {function} A partially-applied form of the function bind() was * invoked as a method of. */ function bind(fn, self, opt_args) { var boundargs = (typeof fn.boundArgs_ != "undefined") ? fn.boundArgs_ : []; boundargs = boundargs.concat(Array.prototype.slice.call(arguments, 2)); if (typeof fn.boundSelf_ != "undefined") { self = fn.boundSelf_; } if (typeof fn.boundFn_ != "undefined") { fn = fn.boundFn_; } var newfn = function() { // Combine the static args and the new args into one big array var args = boundargs.concat(Array.prototype.slice.call(arguments)); return fn.apply(self, args); } newfn.boundArgs_ = boundargs; newfn.boundSelf_ = self; newfn.boundFn_ = fn; return newfn; } /** * An alias to the bind() global function. * * Usage: * var g = f.bind(obj, arg1, arg2); * g(arg3, arg4); */ Function.prototype.bind = function(self, opt_args) { return bind.apply( null, [this, self].concat(Array.prototype.slice.call(arguments, 1))); } /** * Like bind(), except that a "this object" is not required. Useful when the * target function is already bound. * * Usage: * var g = partial(f, arg1, arg2); * g(arg3, arg4); */ function partial(fn, opt_args) { return bind.apply( null, [fn, null].concat(Array.prototype.slice.call(arguments, 1))); } /** * An alias to the partial() global function. * * Usage: * var g = f.partial(arg1, arg2); * g(arg3, arg4); */ Function.prototype.partial = function(opt_args) { return bind.apply( null, [this, null].concat(Array.prototype.slice.call(arguments))); } /** * Convenience. Binds all the methods of obj to itself. Calling this in the * constructor before referencing any methods makes things a little more like * Java or Python where methods are intrinsically bound to their instance. */ function bindMethods(obj) { for (var p in obj) { if (isFunction(obj[p])) { obj[p] = obj[p].bind(obj); } } } /** * Inherit the prototype methods from one constructor into another. * * Usage: * <pre> * function ParentClass(a, b) { } * ParentClass.prototype.foo = function(a) { } * * function ChildClass(a, b, c) { * ParentClass.call(this, a, b); * } * * ChildClass.inherits(ParentClass); * * var child = new ChildClass("a", "b", "see"); * child.foo(); // works * </pre> * * In addition, a superclass' implementation of a method can be invoked * as follows: * * <pre> * ChildClass.prototype.foo = function(a) { * ChildClass.superClass_.foo.call(this, a); * // other code * }; * </pre> */ Function.prototype.inherits = function(parentCtor) { var tempCtor = function(){}; tempCtor.prototype = parentCtor.prototype; this.superClass_ = parentCtor.prototype; this.prototype = new tempCtor(); } /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Google Safe Browsing. * * The Initial Developer of the Original Code is Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Fritz Schneider <fritz@google.com> (original author) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ // This is the code used to interact with data encoded in the // goog-black-enchash format. The format is basically a map from // hashed hostnames to encrypted sequences of regular expressions // where the encryption key is derived from the hashed // hostname. Encoding lists like this raises the bar slightly on // deriving complete table data from the db. This data format is NOT // our idea; we would've raise the bar higher :) // // Anyway, this code is a port of the original C++ implementation by // Garret. To ease verification, I mirrored that code as closely as // possible. As a result, you'll see some C++-style variable naming // and roundabout (C++) ways of doing things. Additionally, I've // omitted the comments. // // This code should not change, except to fix bugs. // // TODO: verify that using encodeURI() in getCanonicalHost is OK // TODO: accommodate other kinds of perl-but-not-javascript qualifiers /** * This thing knows how to generate lookup keys and decrypt values found in * a table of type enchash. */ function PROT_EnchashDecrypter() { this.debugZone = "enchashdecrypter"; this.REs_ = PROT_EnchashDecrypter.REs; this.hasher_ = new G_CryptoHasher(); this.base64_ = new G_Base64(); this.streamCipher_ = Cc["@mozilla.org/security/streamcipher;1"] .createInstance(Ci.nsIStreamCipher); } PROT_EnchashDecrypter.DATABASE_SALT = "oU3q.72p"; PROT_EnchashDecrypter.SALT_LENGTH = PROT_EnchashDecrypter.DATABASE_SALT.length; PROT_EnchashDecrypter.MAX_DOTS = 5; PROT_EnchashDecrypter.REs = {}; PROT_EnchashDecrypter.REs.FIND_DODGY_CHARS = new RegExp("[\x01-\x1f\x7f-\xff]+"); PROT_EnchashDecrypter.REs.FIND_DODGY_CHARS_GLOBAL = new RegExp("[\x01-\x1f\x7f-\xff]+", "g"); PROT_EnchashDecrypter.REs.FIND_END_DOTS = new RegExp("^\\.+|\\.+$"); PROT_EnchashDecrypter.REs.FIND_END_DOTS_GLOBAL = new RegExp("^\\.+|\\.+$", "g"); PROT_EnchashDecrypter.REs.FIND_MULTIPLE_DOTS = new RegExp("\\.{2,}"); PROT_EnchashDecrypter.REs.FIND_MULTIPLE_DOTS_GLOBAL = new RegExp("\\.{2,}", "g"); PROT_EnchashDecrypter.REs.FIND_TRAILING_DOTS = new RegExp("\\.+$"); PROT_EnchashDecrypter.REs.POSSIBLE_IP = new RegExp("^((?:0x[0-9a-f]+|[0-9\\.])+)$", "i"); PROT_EnchashDecrypter.REs.FIND_BAD_OCTAL = new RegExp("(^|\\.)0\\d*[89]"); PROT_EnchashDecrypter.REs.IS_OCTAL = new RegExp("^0[0-7]*$"); PROT_EnchashDecrypter.REs.IS_DECIMAL = new RegExp("^[0-9]+$"); PROT_EnchashDecrypter.REs.IS_HEX = new RegExp("^0[xX]([0-9a-fA-F]+)$"); // Regexps are given in perl regexp format. Unfortunately, JavaScript's // library isn't completely compatible. For example, you can't specify // case-insensitive matching by using (?i) in the expression text :( // So we manually set this bit with the help of this regular expression. PROT_EnchashDecrypter.REs.CASE_INSENSITIVE = /\(\?i\)/g; /** * Helper function * * @param str String to get chars from * * @param n Number of characters to get * * @returns String made up of the last n characters of str */ PROT_EnchashDecrypter.prototype.lastNChars_ = function(str, n) { n = -n; return str.substr(n); } /** * We have to have our own hex-decoder because decodeURIComponent * expects UTF-8 (so it will barf on invalid UTF-8 sequences). * * @param str String to decode * * @returns The decoded string */ PROT_EnchashDecrypter.prototype.hexDecode_ = function(str) { var output = []; var i = 0; while (i < str.length) { var c = str.charAt(i); if (c == "%" && i + 2 < str.length) { var asciiVal = Number("0x" + str.charAt(i + 1) + str.charAt(i + 2)); if (!isNaN(asciiVal)) { i += 2; c = String.fromCharCode(asciiVal); } } output[output.length] = c; ++i; } return output.join(""); } /** * Translate a plaintext enchash value into regular expressions * * @param data String containing a decrypted enchash db entry * * @returns An array of RegExps */ PROT_EnchashDecrypter.prototype.parseRegExps = function(data) { var res = data.split("\t"); G_Debug(this, "Got " + res.length + " regular rexpressions"); for (var i = 0; i < res.length; i++) { // Could have leading (?i); if so, set the flag and strip it var flags = (this.REs_.CASE_INSENSITIVE.test(res[i])) ? "i" : ""; res[i] = res[i].replace(this.REs_.CASE_INSENSITIVE, ""); res[i] = new RegExp(res[i], flags); } return res; } /** * Get the canonical version of the given URL for lookup in a table of * type -url. * * @param url String to canonicalize * * @returns String containing the canonicalized url (maximally url-decoded * with hostname normalized, then specially url-encoded) */ PROT_EnchashDecrypter.prototype.getCanonicalUrl = function(url) { var escapedUrl = PROT_URLCanonicalizer.canonicalizeURL_(url); // Normalize the host var host = this.getCanonicalHost(escapedUrl); if (!host) { // Probably an invalid url, return what we have so far. return escapedUrl; } // Combine our normalized host with our escaped url. var ioService = Cc["@mozilla.org/network/io-service;1"] .getService(Ci.nsIIOService); var urlObj = ioService.newURI(escapedUrl, null, null); urlObj.host = host; return urlObj.asciiSpec; } /** * @param opt_maxDots Number maximum number of dots to include. */ PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str, opt_maxDots) { var ioService = Cc["@mozilla.org/network/io-service;1"] .getService(Ci.nsIIOService); try { var urlObj = ioService.newURI(str, null, null); var asciiHost = urlObj.asciiHost; } catch (e) { G_Debug(this, "Unable to get hostname: " + str); return ""; } var unescaped = this.hexDecode_(asciiHost); unescaped = unescaped.replace(this.REs_.FIND_DODGY_CHARS_GLOBAL, "") .replace(this.REs_.FIND_END_DOTS_GLOBAL, "") .replace(this.REs_.FIND_MULTIPLE_DOTS_GLOBAL, "."); var temp = this.parseIPAddress_(unescaped); if (temp) unescaped = temp; // TODO: what, exactly is it supposed to escape? This doesn't esecape // ":", "/", ";", and "?" var escaped = encodeURI(unescaped); if (opt_maxDots) { // Limit the number of dots var k; var index = escaped.length; for (k = 0; k < opt_maxDots + 1; k++) { temp = escaped.lastIndexOf(".", index - 1); if (temp == -1) { break; } else { index = temp; } } if (k == opt_maxDots + 1 && index != -1) { escaped = escaped.substring(index + 1); } } escaped = escaped.toLowerCase(); return escaped; } PROT_EnchashDecrypter.prototype.parseIPAddress_ = function(host) { host = host.replace(this.REs_.FIND_TRAILING_DOTS_GLOBAL, ""); if (!this.REs_.POSSIBLE_IP.test(host)) return ""; var parts = host.split("."); if (parts.length > 4) return ""; var allowOctal = !this.REs_.FIND_BAD_OCTAL.test(host); for (var k = 0; k < parts.length; k++) { var canon; if (k == parts.length - 1) { canon = this.canonicalNum_(parts[k], 5 - parts.length, allowOctal); } else { canon = this.canonicalNum_(parts[k], 1, allowOctal); } if (canon != "") parts[k] = canon; } return parts.join("."); } PROT_EnchashDecrypter.prototype.canonicalNum_ = function(num, bytes, octal) { if (bytes < 0) return ""; var temp_num; if (octal && this.REs_.IS_OCTAL.test(num)) { num = this.lastNChars_(num, 11); temp_num = parseInt(num, 8); if (isNaN(temp_num)) temp_num = -1; } else if (this.REs_.IS_DECIMAL.test(num)) { num = this.lastNChars_(num, 32); temp_num = parseInt(num, 10); if (isNaN(temp_num)) temp_num = -1; } else if (this.REs_.IS_HEX.test(num)) { num = this.lastNChars_(num, 8); temp_num = parseInt(num, 16); if (isNaN(temp_num)) temp_num = -1; } else { return ""; } if (temp_num == -1) return ""; // Since we mod the number, we're removing the least significant bits. We // Want to push them into the front of the array to preserve the order. var parts = []; while (bytes--) { parts.unshift("" + (temp_num % 256)); temp_num -= temp_num % 256; temp_num /= 256; } return parts.join("."); } PROT_EnchashDecrypter.prototype.getLookupKey = function(host) { var dataKey = PROT_EnchashDecrypter.DATABASE_SALT + host; dataKey = this.base64_.arrayifyString(dataKey); this.hasher_.init(G_CryptoHasher.algorithms.MD5); var lookupDigest = this.hasher_.updateFromArray(dataKey); var lookupKey = this.hasher_.digestHex(); return lookupKey.toUpperCase(); } PROT_EnchashDecrypter.prototype.decryptData = function(data, host) { // XXX: base 64 decoding should be done in C++ var asciiArray = this.base64_.decodeString(data); var ascii = this.base64_.stringifyArray(asciiArray); var random_salt = ascii.slice(0, PROT_EnchashDecrypter.SALT_LENGTH); var encrypted_data = ascii.slice(PROT_EnchashDecrypter.SALT_LENGTH); var temp_decryption_key = PROT_EnchashDecrypter.DATABASE_SALT + random_salt + host; this.hasher_.init(G_CryptoHasher.algorithms.MD5); this.hasher_.updateFromString(temp_decryption_key); var keyFactory = Cc["@mozilla.org/security/keyobjectfactory;1"] .getService(Ci.nsIKeyObjectFactory); var key = keyFactory.keyFromString(Ci.nsIKeyObject.RC4, this.hasher_.digestRaw()); this.streamCipher_.init(key); this.streamCipher_.updateFromString(encrypted_data); return this.streamCipher_.finish(false /* no base64 */); } /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Google Safe Browsing. * * The Initial Developer of the Original Code is Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Tony Chang <tony@google.com> (original author) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ /** * This class helps us batch a series of async calls to the db. * If any of the tokens is in the database, we fire callback with * true as a param. If all the tokens are not in the database, * we fire callback with false as a param. * This is an "Abstract" base class. Subclasses need to supply * the condition_ method. * * @param tokens Array of strings to lookup in the db * @param tableName String name of the table * @param callback Function callback function that takes true if the condition * passes. */ function MultiQuerier(tokens, tableName, callback) { this.tokens_ = tokens; this.tableName_ = tableName; this.callback_ = callback; this.dbservice_ = Cc["@mozilla.org/url-classifier/dbservice;1"] .getService(Ci.nsIUrlClassifierDBService); // We put the current token in this variable. this.key_ = null; } /** * Run the remaining tokens against the db. */ MultiQuerier.prototype.run = function() { if (this.tokens_.length == 0) { this.callback_.handleEvent(false); this.dbservice_ = null; this.callback_ = null; return; } this.key_ = this.tokens_.pop(); G_Debug(this, "Looking up " + this.key_ + " in " + this.tableName_); this.dbservice_.exists(this.tableName_, this.key_, BindToObject(this.result_, this)); } /** * Callback from the db. If the returned value passes the this.condition_ * test, go ahead and call the main callback. */ MultiQuerier.prototype.result_ = function(value) { if (this.condition_(value)) { this.callback_.handleEvent(true) this.dbservice_ = null; this.callback_ = null; } else { this.run(); } } // Subclasses must override this. MultiQuerier.prototype.condition_ = function(value) { throw "MultiQuerier is an abstract base class"; } /** * Concrete MultiQuerier that stops if the key exists in the db. */ function ExistsMultiQuerier(tokens, tableName, callback) { MultiQuerier.call(this, tokens, tableName, callback); this.debugZone = "existsMultiQuerier"; } ExistsMultiQuerier.inherits(MultiQuerier); ExistsMultiQuerier.prototype.condition_ = function(value) { return value.length > 0; } /** * Concrete MultiQuerier that looks up a key, decrypts it, then * checks the the resulting regular expressions for a match. * @param tokens Array of hosts */ function EnchashMultiQuerier(tokens, tableName, callback, url) { MultiQuerier.call(this, tokens, tableName, callback); this.url_ = url; this.enchashDecrypter_ = new PROT_EnchashDecrypter(); this.debugZone = "enchashMultiQuerier"; } EnchashMultiQuerier.inherits(MultiQuerier); EnchashMultiQuerier.prototype.run = function() { if (this.tokens_.length == 0) { this.callback_.handleEvent(false); this.dbservice_ = null; this.callback_ = null; return; } var host = this.tokens_.pop(); this.key_ = host; var lookupKey = this.enchashDecrypter_.getLookupKey(host); this.dbservice_.exists(this.tableName_, lookupKey, BindToObject(this.result_, this)); } EnchashMultiQuerier.prototype.condition_ = function(encryptedValue) { if (encryptedValue.length > 0) { // We have encrypted regular expressions for this host. Let's // decrypt them and see if we have a match. var decrypted = this.enchashDecrypter_.decryptData(encryptedValue, this.key_); var res = this.enchashDecrypter_.parseRegExps(decrypted); for (var j = 0; j < res.length; j++) { if (res[j].test(this.url_)) { return true; } } } return false; } /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Google Safe Browsing. * * The Initial Developer of the Original Code is Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Fritz Schneider <fritz@google.com> (original author) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ // This is the class we use to canonicalize URLs for TRTables of type // url. We maximally URL-decode the URL, treating +'s as if they're // not special. We then specially URL-encode it (we encode ASCII // values [0, 32] (whitespace or unprintable), 37 (%), [127, 255] // (unprintable)). // // This mapping is not a function. That is, multiple URLs can map to // the same canonical representation. However this is OK because // collisions happen only when there are weird characters (e.g., // nonprintables), and the canonical representation makes us robust // to some weird kinds of encoding we could see. // // All members are static at this point -- this is basically a namespace. /** * Create a new URLCanonicalizer. Useless because members are static. * * @constructor */ function PROT_URLCanonicalizer() { throw new Error("No need to instantiate a canonicalizer at this point."); } PROT_URLCanonicalizer.debugZone = "urlcanonicalizer"; PROT_URLCanonicalizer.hexChars_ = "0123456789ABCDEF"; /** * Helper funciton to (maybe) convert a two-character hex string into its * decimal numerical equivalent * * @param hh String of length two that might be a valid hex sequence * * @returns Number: NaN if hh wasn't valid hex, else the appropriate decimal * value */ PROT_URLCanonicalizer.hexPairToInt_ = function(hh) { return Number("0x" + hh); } /** * Helper function to hex-encode a number * * @param val Number in range [0, 255] * * @returns String containing the hex representation of that number (sans 0x) */ PROT_URLCanonicalizer.toHex_ = function(val) { var retVal = PROT_URLCanonicalizer.hexChars_.charAt((val >> 4) & 15) + PROT_URLCanonicalizer.hexChars_.charAt(val & 15); return retVal; } /** * Canonicalize a URL. DON'T USE THIS DIRECTLY. Use * PROT_EnchashDecrypter.prototype.getCanonicalUrl instead. This method * url-decodes a string, but it doesn't normalize the hostname. The method * in EnchashDecrypter first calls this method, then normalizes the hostname. * * @param url String to canonicalize * * @returns String containing the canonicalized url (maximally url-decoded, * then specially url-encoded) */ PROT_URLCanonicalizer.canonicalizeURL_ = function(url) { var arrayOfASCIIVals = PROT_URLCanonicalizer.fullyDecodeURL_(url); return PROT_URLCanonicalizer.specialEncodeURL_(arrayOfASCIIVals); } /** * Maximally URL-decode a URL. This breaks the semantics of the URL, but * we don't care because we're using it for lookup, not for navigation. * We break multi-byte UTF-8 escape sequences as well, but we don't care * so long as they canonicalize the same way consistently (they do). * * @param url String containing the URL to maximally decode. Should ONLY * contain characters with UCS codepoints U+0001 to U+00FF * (the ASCII set minus null). * * @returns Array of ASCII values corresponding to the decoded sequence of * characters in the url */ PROT_URLCanonicalizer.fullyDecodeURL_ = function(url) { // The goals here are: simplicity, correctness, and most of all // portability; we want the same implementation of canonicalization // wherever we use it so as to to minimize the chances of // inconsistency. For example, we have to do this canonicalization // on URLs we get from third parties, and at the lookup server when // we get a request. // // The following implementation should translate easily to any // language that supports arrays and pointers or references. Note // that arrays are pointer types in JavaScript, so foo = [some, // array] points foo at the array; it doesn't copy it. The // implementation is efficient (linear) so long as most %'s in the // url belong to valid escape sequences and there aren't too many // doubly-escaped values. // The basic idea is to copy current input to output, decoding escape // sequences as we see them, until we decode a %. At that point we start // copying into the "next iteration buffer" instead of the output buffer; // we do this so we can accomodate multiply-escaped strings. When we hit // the end of the input, we take the "next iteration buffer" as our input, // and start over. var nextIteration = url.split(""); var output = []; while (nextIteration.length) { var decodedAPercent = false; var thisIteration = nextIteration; var nextIteration = []; var i = 0; while (i < thisIteration.length) { var c = thisIteration[i]; if (c == "%" && i + 2 < thisIteration.length) { // Peek ahead to see if we have a valid HH sequence var asciiVal = PROT_URLCanonicalizer.hexPairToInt_(thisIteration[i + 1] + thisIteration[i + 2]); if (!isNaN(asciiVal)) { i += 2; // Valid HH sequence; consume it if (asciiVal == 0) // We special case nulls asciiVal = 1; c = String.fromCharCode(asciiVal); if (c == "%") decodedAPercent = true; } } if (decodedAPercent) nextIteration[nextIteration.length] = c; else output[output.length] = c.charCodeAt(0); ++i; } } return output; } /** * Maximally URL-decode a URL (same as fullyDecodeURL_ except that it * returns a string). Useful for making unittests more readable. * * @param url String containing the URL to maximally decode. Should ONLY * contain characters with UCS codepoints U+0001 to U+00FF * (the ASCII set minus null). * * @returns String containing the decoded URL */ PROT_URLCanonicalizer.fullyDecodeURLAsString_ = function(url) { var arrayOfASCIIVals = PROT_URLCanonicalizer.fullyDecodeURL_(url); var s = ""; for (var i = 0; i < arrayOfASCIIVals.length; i++) s += String.fromCharCode(arrayOfASCIIVals[i]); return s; } /** * Specially URL-encode the given array of ASCII values. We want to encode * the charcters: [0, 32], 37, [127, 255]. * * @param arrayOfASCIIValues Array of ascii values (numbers) to encode * * @returns String corresonding to the escaped URL */ PROT_URLCanonicalizer.specialEncodeURL_ = function(arrayOfASCIIValues) { var output = []; for (var i = 0; i < arrayOfASCIIValues.length; i++) { var n = arrayOfASCIIValues[i]; if (n <= 32 || n == 37 || n >= 127) output.push("%" + ((!n) ? "01" : PROT_URLCanonicalizer.toHex_(n))); else output.push(String.fromCharCode(n)); } return output.join(""); } /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Url Classifier code * * The Initial Developer of the Original Code is * Google Inc. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Tony Chang <tony@ponderer.org> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ // XXX: This should all be moved into the dbservice class so it happens // in the background thread. /** * Abstract base class for a lookup table. * @construction */ function UrlClassifierTable() { this.debugZone = "urlclassifier-table"; this.name = ''; this.needsUpdate = false; this.enchashDecrypter_ = new PROT_EnchashDecrypter(); } UrlClassifierTable.prototype.QueryInterface = function(iid) { if (iid.equals(Components.interfaces.nsISupports) || iid.equals(Components.interfaces.nsIUrlClassifierTable)) return this; Components.returnCode = Components.results.NS_ERROR_NO_INTERFACE; return null; } /** * Subclasses need to implment this method. */ UrlClassifierTable.prototype.exists = function(url, callback) { throw Components.results.NS_ERROR_NOT_IMPLEMENTED; } ///////////////////////////////////////////////////////////////////// // Url table implementation function UrlClassifierTableUrl() { UrlClassifierTable.call(this); } UrlClassifierTableUrl.inherits(UrlClassifierTable); /** * Look up a URL in a URL table */ UrlClassifierTableUrl.prototype.exists = function(url, callback) { // PROT_URLCanonicalizer.canonicalizeURL_ is the old way of canonicalizing a // URL. Unfortunately, it doesn't normalize numeric domains so alternate IP // formats (hex, octal, etc) won't trigger a match. // this.enchashDecrypter_.getCanonicalUrl does the right thing and // normalizes a URL to 4 decimal numbers, but the update server may still be // giving us encoded IP addresses. So to be safe, we check both cases. var oldCanonicalized = PROT_URLCanonicalizer.canonicalizeURL_(url); var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url); G_Debug(this, "Looking up: " + url + " (" + oldCanonicalized + " and " + canonicalized + ")"); (new ExistsMultiQuerier([oldCanonicalized, canonicalized], this.name, callback)).run(); } ///////////////////////////////////////////////////////////////////// // Domain table implementation function UrlClassifierTableDomain() { UrlClassifierTable.call(this); this.debugZone = "urlclassifier-table-domain"; this.ioService_ = Cc["@mozilla.org/network/io-service;1"] .getService(Ci.nsIIOService); } UrlClassifierTableDomain.inherits(UrlClassifierTable); /** * Look up a URL in a domain table * We also try to lookup domain + first path component (e.g., * www.mozilla.org/products). * * @returns Boolean true if the url domain is in the table */ UrlClassifierTableDomain.prototype.exists = function(url, callback) { var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url); var urlObj = this.ioService_.newURI(canonicalized, null, null); var host = ''; try { host = urlObj.host; } catch (e) { } var hostComponents = host.split("."); // Try to get the path of the URL. Pseudo urls (like wyciwyg:) throw // errors when trying to convert to an nsIURL so we wrap in a try/catch // block. var path = "" try { urlObj.QueryInterface(Ci.nsIURL); path = urlObj.filePath; } catch (e) { } var pathComponents = path.split("/"); // We don't have a good way map from hosts to domains, so we instead try // each possibility. Could probably optimize to start at the second dot? var possible = []; for (var i = 0; i < hostComponents.length - 1; i++) { host = hostComponents.slice(i).join("."); possible.push(host); // The path starts with a "/", so we are interested in the second path // component if it is available if (pathComponents.length >= 2 && pathComponents[1].length > 0) { host = host + "/" + pathComponents[1]; possible.push(host); } } // Run the possible domains against the db. (new ExistsMultiQuerier(possible, this.name, callback)).run(); } ///////////////////////////////////////////////////////////////////// // Enchash table implementation function UrlClassifierTableEnchash() { UrlClassifierTable.call(this); this.debugZone = "urlclassifier-table-enchash"; } UrlClassifierTableEnchash.inherits(UrlClassifierTable); /** * Look up a URL in an enchashDB. We try all sub domains (up to MAX_DOTS). */ UrlClassifierTableEnchash.prototype.exists = function(url, callback) { url = this.enchashDecrypter_.getCanonicalUrl(url); var host = this.enchashDecrypter_.getCanonicalHost(url, PROT_EnchashDecrypter.MAX_DOTS); var possible = []; for (var i = 0; i < PROT_EnchashDecrypter.MAX_DOTS + 1; i++) { possible.push(host); var index = host.indexOf("."); if (index == -1) break; host = host.substring(index + 1); } // Run the possible domains against the db. (new EnchashMultiQuerier(possible, this.name, callback, url)).run(); } //@line 47 "/cygdrive/c/builds/tinderbox/Fx-Mozilla1.8-release/WINNT_5.2_Depend/mozilla/toolkit/components/url-classifier/src/nsUrlClassifierTable.js" var modScope = this; function Init() { // Pull the library in. var jslib = Cc["@mozilla.org/url-classifier/jslib;1"] .getService().wrappedJSObject; modScope.G_Preferences = jslib.G_Preferences; modScope.G_PreferenceObserver = jslib.G_PreferenceObserver; modScope.G_Debug = jslib.G_Debug; modScope.G_CryptoHasher = jslib.G_CryptoHasher; modScope.G_Base64 = jslib.G_Base64; modScope.BindToObject = jslib.BindToObject; // We only need to call Init once. modScope.Init = function() {}; } function UrlClassifierTableMod() { this.components = {}; this.addComponent({ cid: "{43399ee0-da0b-46a8-9541-08721265981c}", name: "UrlClassifier Table Url Module", progid: "@mozilla.org/url-classifier/table;1?type=url", factory: new UrlClassifierTableFactory(UrlClassifierTableUrl) }); this.addComponent({ cid: "{3b5004c6-3fcd-4b12-b311-a4dfbeaf27aa}", name: "UrlClassifier Table Domain Module", progid: "@mozilla.org/url-classifier/table;1?type=domain", factory: new UrlClassifierTableFactory(UrlClassifierTableDomain) }); this.addComponent({ cid: "{04f15d1d-2db8-4b8e-91d7-82f30308b434}", name: "UrlClassifier Table Enchash Module", progid: "@mozilla.org/url-classifier/table;1?type=enchash", factory: new UrlClassifierTableFactory(UrlClassifierTableEnchash) }); } UrlClassifierTableMod.prototype.addComponent = function(comp) { this.components[comp.cid] = comp; }; UrlClassifierTableMod.prototype.registerSelf = function(compMgr, fileSpec, loc, type) { compMgr = compMgr.QueryInterface(Ci.nsIComponentRegistrar); // Register all the components for (var cid in this.components) { var comp = this.components[cid]; compMgr.registerFactoryLocation(Components.ID(comp.cid), comp.name, comp.progid, fileSpec, loc, type); } }; UrlClassifierTableMod.prototype.getClassObject = function(compMgr, cid, iid) { var comp = this.components[cid.toString()]; if (!comp) throw Components.results.NS_ERROR_NO_INTERFACE; if (!iid.equals(Ci.nsIFactory)) throw Components.results.NS_ERROR_NOT_IMPLEMENTED; return comp.factory; }; UrlClassifierTableMod.prototype.canUnload = function(compMgr) { return true; }; /** * Create a factory. * @param ctor Function constructor for the object we're creating. */ function UrlClassifierTableFactory(ctor) { this.ctor = ctor; } UrlClassifierTableFactory.prototype.createInstance = function(outer, iid) { if (outer != null) throw Components.results.NS_ERROR_NO_AGGREGATION; Init(); return (new this.ctor()).QueryInterface(iid); }; var modInst = new UrlClassifierTableMod(); function NSGetModule(compMgr, fileSpec) { return modInst; }