`
`Symantec 1005
`IPR of U.S. Pat. No. 7,757,298
`
`
`
`U.S. Patent
`
`Sep. 19,2000
`
`Sheet 1 of4
`
`6,122,657
`
`104
`
`KEYBOARD
`
`PO|NT|NG
`
`DEWCE
`
`DISPLAY
`
`SCREEN
`
`MASS
`STORAGE
`
`108
`
`OUTPUT
`
`DEVICE
`
`111
`
`
`
`NETWORK
`
`CONTROLLER
`
`(e.g., ETHERNET)
`
`I/O
`
`CONTROLLER
`
`MODEM
`
`110
`
`FIG. 1
`
`000002
`
`102
`
`MAIN
`
`MEMORY
`
`101
`
`CENTRAL
`
`PROCESSOR
`
`CACHE
`MEMORY
`
`109
`
`000002
`
`
`
`3U
`
`wa
`
`S
`
`4M
`
`21,6
`
`7.®
`
`w@_>_<meoEm_,_o:<o:&<
`
`pgum
`
`mEzmmcz
`
`zocomzzoo
`
`2,<mem
`
`«mm:
`
`
`
`Msm:m>mozcfimaomea
`
`
`
`
`
`mo<&Ez_m\%.mn_:z.,w_>>zo_»%_\_M__m_.h\_,__\,_oom$2.m
`
`000003
`
`
`
`U.S. Patent
`
`M3m
`
`6,122,657
`
`4mmGE
`
`MzozomzzooMEZEE&SE
`
`4m
`
`000004
`
`
`
`
`
`.whS
`
`04
`
`756,221,6
`
`Mso
`
`zo:<Em_z_s_o<
`
`mGE
`
`tHCtaP3U
`
`oz_=E<%_o
`
`n_OO._
`
`n..
`
`&an
`
`
`
`Emgommw,oRoz_$::E
`
`
`x8m2§5n_z_053$85&8mTull_>_<mEmas_2<mEm+I:||I|vmm.
`
`M
`
`000005
`
`
`
`
`6,122,657
`
`1
`INTERNET COMPUTER SYSTEM WITH
`METHODS FOR DYNAMIC FILTERING OF
`HYPERTEXT TAGS AND CONTENT
`
`The present application claims priority from commonly-
`owned provisional patent application Ser. No. 60/037,161,
`filed Feb. 4, 1997, entitled Internet Computer System With
`Methods For Dynamic Filtering Of Hypertext Tags And
`Content, and listing co-inventors of Gene Hoffman, Jr. and
`Mark Elrod, the disclosure of which is hereby incorporated
`by reference.
`
`COPYRIGHT NOTICE
`
`A portion of the disclosure of this patent document
`contains material which is subject to copyright protection.
`The copyright owner has no objection to the facsimile
`reproduction by anyone of the patent document or the patent
`disclosure as it appears in the Patent and Trademark Office
`patent file or records, but otherwise reserves all copyright
`rights whatsoever.
`BACKGROUND OF THE INVENTION
`
`The present application relates generally to use of a
`computer with the Internet and, more particularly, methods
`for speeding up the process of browsing Web content in a
`computer system having an Internet or other on-line
`browser.
`
`With the ever-increasing popularity of the Internet, par-
`ticularly the World Wide Web (“Web”) portion of the
`Internet, more and more personal computers (PC’s) provide
`Internet access to vast stores of information through Web
`“browsers” (e.g., Microsoft Internet Explorer or Netscape
`Navigator) or other “Internet applications.” Browsers and
`other Internet applications includes the ability to access a
`URL (Universal Resource Locator) or “Web” site. The URL
`is used to specify the location of a file held on a remote
`machine.
`
`Each URL itself is composed of several distinct compo-
`nents. For example, the URL http://host/file.html includes
`three distinct components. The first component, http, speci-
`fies the protocol (here, “HTTP” or HyperText Transfer
`Protocol) that is to be used to access the target file. Other
`access protocols can be specified by a URL. For example,
`the URL of ftp://ftp.pgp.com/pub/docs/samples specifies
`access to files via “FTP” (File Transfer Protocol). This
`specifies a link for accessing the file directory docs/samples
`on the machine ftp.pgp.com.
`The second component, host, indicates the name of the
`remote machine; this can be expressed as either a symbol
`name (e.g., pgp. com) or a numeric IP (Internet Protocol)
`address such as 123.200.1.1. The final component,file.html,
`provides the path name of the target file—that is, the file
`which the hypertext link is to be made. The file is referenced
`relative to the base directory in which Web pages are held;
`the location of this directory is specified by the person who
`has set up the Web server (i.e., “Webmaster”).
`is
`The majority of content available on the Internet
`represented in “HTML” documents which, in turn, are read
`or accessed by Web browsers. In particular, the HTML or
`Hypertext Markup Language is the scripting language used
`to create the documents for the World Wide Web. Although
`most browsers will display any document that is written in
`plain text, HTML documents afford several advantages. In
`particular, HTML documents include formatting, graphics,
`and “hypertext links” to other documents.
`Markup languages are used to describe the structure of the
`document. HTML is used to mark various elements in a
`
`10
`
`15
`
`20
`
`25
`
`30
`
`35
`
`40
`
`45
`
`50
`
`55
`
`60
`
`65
`
`2
`document, including headings, paragraphs, lists, tables, and
`the like. To achieve this, an HTML document
`includes
`formatting commands or “tags” embedded within the text of
`the document which serve as commands to a browser. Here,
`HTML tags mark the elements of a file for browsers.
`Elements can contain plain text, other elements, or both. The
`browser reading the document interprets these markup tags
`or commands to help format the document for subsequent
`display to a user. The browser thus displays the document
`with regard to features that the viewer selects either explic-
`itly or implicitly. Factors affecting the layout and presenta-
`tion include, for instance, the markup tags used, the physical
`page width available, and the fonts used to display the text.
`The design of HTML tags is relatively simple. Individual
`HTML tags begin with a <(“less than”) character and end
`with a>(“greater than”) character, such as <title> which
`serves to identify text which follows as the title of a
`document. HTML tags are not case-sensitive (with the
`exception of HTML escape sequences) and are often used in
`symmetric pairs, with the final tag indicated by the inclusion
`of a / (slash) character. For instance, the <title>tag represents
`a beginning tag which would be paired with a </title> ending
`tag. These paired commands would thus be applied to the
`text contained within the beginning and ending commands,
`such as <title> My Sample Title <Ititle>. The <B> tag, on the
`other hand, informs browsers that the text which follows is
`to be in bold type. This bolding is turned off by the inverse
`markup tag </B>. In contrast to these paired or “container”
`tags, separator tags are used unpaired. For example, the
`command <br> is employed by itself to insert a line break.
`Browsers generally ignore extra spaces and new lines
`between words and markup tags when reading the docu-
`ment. In other words, “white space” characters, such as tabs,
`spaces, and new line characters, are generally ignored in
`HTML. Leaving a blank line in one’s document,
`for
`instance, generally does not create a blank line when the
`document is displayed in a browser, unless one uses the
`“preformatted” HTML tag (<pre> and </pre>). Finally, not
`all tags are supported by all Web browsers. If a browser does
`not support a tag, it (usually) just ignores it.
`The attraction of the World Wide Web is of course the
`
`“rich” content which it stores, largely as a collection of these
`interconnected Web or HTML pages. With each passing day,
`the information content available on the Web is more and
`
`more graphical in nature (e.g., high use of bitmaps). Accom-
`panying the explosive growth of the World Wide Web, for
`instance, is the ever increasing use of advertising material on
`practically any content which a user can access. This is
`particularly problematic since advertising material is often
`graphically intensive,
`requiring substantial
`time and
`resources for downloading and processing. Apart
`from
`advertising, many Web sites employ graphics to such an
`extreme degree as to render it difficult or impractical to
`access the Web site in real-time unless one has a high-speed
`Internet connection (e.g., T1 line). All told, the total down-
`load times for Web pages is becoming increasingly greater.
`At the same time, the underlying infrastructure of the Web
`has not
`improved to a sufficient degree to offset
`this
`increased resource demand. Although advertising on the
`Web serves as one example, there exists a more general
`problem of how a user of the Web can exert at least some
`control over the content which is downloaded into his or her
`
`browser. Accordingly, there is great interest in developing
`techniques which speed up the process of browsing Web
`content or “Web surfing,” including decreasing the back-
`ground noise (e.g., ancillary graphics) which are not desired
`by the user.
`
`000006
`
`000006
`
`
`
`6,122,657
`
`3
`SUMMARY OF THE INVENTION
`
`An Internet computer system with methods for dynamic
`filtering of hypertext tags and content is described. The
`system includes one or more Web clients, each with an
`Internet connection to one or more Web servers. An exem-
`
`plary Web client comprises a personal computer or work-
`station operating a Web browser (e.g., Netscape Navigator
`or Microsoft Internet Explorer) which communicates with
`the Internet via a communication layer, such as Microsoft
`Winsock (Winsock.dll)—a Windows implementation of
`TCP/IP Transmission Control Protocol/Internet Protocol
`(TCP/IP).
`interposed (functionally) between the
`At each client,
`browser and the communication layer is a Filter module of
`the present invention. In this fashion, the Filter module can
`trap and process all communications between the browser
`and the communication layer. For a client employing a
`Winsock 2 communication driver,
`the Filter module can
`register itself with the Winsock driver directly and, thereby,
`provide trapping and processing of communication in a
`manner which has the native support of the driver.
`The Filter module, which implements client-side meth-
`odology at each individual Web client for dynamic filtering
`of hypertext tags and content, includes an output stream, a
`processing loop, a Filter method, and an input stream. For
`assisting with user configuration of its operation, the Filter
`module also includes a graphical user interface (GUI)
`administration module. The input stream is responsible for
`getting input; it interfaces directly with the Winsock com-
`munication driver. In a corresponding manner, the output
`stream communicates with the (client) browser; it is respon-
`sible for providing output to the browser which is to be
`ultimately rendered on screen for the user. Accordingly, the
`output stream represents the data pool right before it is sent
`to the browser. The Filter method, on the other hand,
`represents the workhorse method or core logic for perform-
`ing the filtering.
`At a high level, the Filter module operates as follows. The
`Web browser retrieves content by generating requests for
`content. More particularly, a fetch or GET request or com-
`mand (e.g., using HTTP protocol) is issued through the
`Winsock communication driver, for example, for fetching
`particular content (e.g., bitmaps) specified by a Web page.
`The command is, however, first trapped by the Filter mod-
`ule. The “real” request or command is at this point processed
`by the Filter method. At the level of the Filter method, the
`system can modify the command, delete the command,
`synthesize new commands, or pass through unchanged the
`existing command, thereby impacting how the system ren-
`ders Web pages. In an exemplary embodiment, the Filter
`method provides handlers for specific processing of various
`HTML tags, all operating according to user-configurable
`filtering preferences.
`
`BRIEF DESCRIPTION OF THE DRAWINGS
`
`FIG. 1 is a block diagram illustrating a computer system
`in which the present invention may be embodied.
`FIG. 2A is a block diagram of a computer software system
`for controlling the operation of the computer system of FIG.
`1.
`
`FIG. 2B is a block diagram illustrating implementation of
`a client-side approach to HTML filtering, at each individual
`Web client.
`
`FIG. 3 is a block diagram illustrating detailed internal
`architecture of a Filter module of the present invention.
`
`10
`
`15
`
`20
`
`25
`
`30
`
`35
`
`40
`
`45
`
`50
`
`55
`
`60
`
`65
`
`4
`DETAILED DESCRIPTION OF A PREFERRED
`EMBODIMENT
`
`The following description will focus on the presently-
`preferred embodiment of the present invention, which is
`operative in an Internet application (e.g., Web browser)
`running under the Microsoft® Windows environment. The
`present invention, however, is not limited to any particular
`one application or any particular environment. Instead, those
`skilled in the art will find that the system and methods of the
`present invention may be advantageously applied to a vari-
`ety of system and application software, including database
`management systems, word processors, spreadsheets, and
`the like. Moreover, the present invention may be embodied
`on a variety of different platforms, including Macintosh,
`UNIX, NextStep, and the like. Therefore, the description of
`the exemplary embodiments which follows is for purposes
`of illustration and not limitation.
`
`System Hardware
`The invention may be embodied on a computer system
`such as the system 100 of FIG. 1, which comprises a central
`processor 101, a main memory 102, an input/output con-
`troller 103, a keyboard 104, a pointing device 105 (e.g.,
`mouse, track ball, pen device, or the like), a display or screen
`device 106, and a mass storage 107 (e.g., hard or fixed disk,
`removable floppy disk, optical disk, magneto-optical disk, or
`flash memory), a network interface card or controller 111
`(e.g., Ethernet), and a modem 112 (e.g., 28.8K baud modem
`or ISDN modem). Although not shown separately, a real-
`time system clock is included with the system 100, in a
`conventional manner. Processor 101 includes or is coupled
`to a cache memory 109 for storing frequently accessed
`information; memory 109 may be an on-chip cache or
`external cache (as shown). One or more input/output
`device(s) 108, such as a printing device or slide output
`device, are included in the system 100, as desired. As shown,
`the various components of the system 100 communicate
`through a system bus 110 or similar architecture. The system
`itself communicates with other systems via a network inter-
`face card 111 (e.g., available from 3Com) and/or modem 112
`(e.g., available from U.S. Robotics).
`In a preferred
`embodiment,
`the system 100 includes an IBM
`PC-compatible personal computer, available from a variety
`of vendors (including IBM of Armonk,
`I/O device 108
`may include a laser printer, such as an HP Laserjet printer,
`which is available from Hewlett-Packard of Palo Alto, Calif.
`System Software
`Illustrated in FIG. 2A, a computer software system 220 is
`provided for directing the operation of the computer system
`100. Software system 220, which is stored in system
`memory 102 and on storage (e.g., disk memory) 107,
`includes a kernel or operating system (OS) 240 and a
`windows shell 250. One or more application programs, such
`as client application software or “programs” 245 may be
`“loaded” (i.e., transferred from storage 107 into memory
`102)
`for execution by the system 100.
`In a preferred
`embodiment, client application software includes a Web
`browser (e.g., Netscape Navigator or Microsoft Internet
`Explorer) which communicates through a communication
`layer or driver 241 (e.g., Winsock) with the Internet.
`System 220 includes a user interface (UI) 260, preferably
`a Graphical User Interface (GUI), for receiving user com-
`mands and data. These inputs, in turn, may be acted upon by
`the system 100 in accordance with instructions from oper-
`ating module 240, windows 250, and/or client application
`module(s) 245. The UI 260 also serves to display the results
`of operation from the OS 240, windows 250, and
`application(s) 245, whereupon the user may supply addi-
`
`OOOOO7
`
`000007
`
`
`
`6,122,657
`
`5
`tional inputs or terminate the session. OS 240 and windows
`245 can be provided by Microsoft® Windows 95, by
`Microsoft® Windows NT, or by Microsoft® Windows 3.x
`(operating in conjunction with MS-DOS); these are avail-
`able from Microsoft Corporation of Redmond, Wash.
`Alternatively, OS 240 and windows 245 can be provided by
`IBM OS/2 (available from IBM of Armonk, N.Y.) or Macin-
`tosh OS (available from Apple Computers of Cupertino,
`Calif.). Although shown conceptually as a separate module,
`the UI is typically provided by interaction of the application
`modules with the windows shell, both operating under OS
`240.
`
`Of particular interest, the system 220 includes a Filter
`module 225 of the present invention. The Filter 225 manages
`content of Web pages downloaded from the Internet (or
`Intranet) by a user, according to user-configurable filter
`settings. Construction and operation of the Filter 225 of the
`present invention will now be described in further detail.
`Methodology for dynamic filtering of HTML tags and
`content
`A. HTML documents and client/server HTTP communi-
`cation
`
`Before describing construction of the Filter module of the
`present invention in further detail, it is helpful to first review
`in further detail both the internal organization of HTML
`documents and the operation of HTTP protocol for trans-
`mitting HTML documents to Web clients.
`1. Organization of HTML documents
`Every HTML document requires certain standard HTML
`tags in order to be correctly interpreted by Web browsers.
`Each document consists of head and body text. The head
`contains the title, and the body contains the actual text that
`is made up of paragraphs, lists, and other elements. Brows-
`ers expect specific information because they are pro-
`grammed according to HTML and SGML specifications.
`The basic layout of an HTML document,
`including
`required elements,
`is illustrated in the following simple
`HTML document.
`<HTML>
`
`<HEAD>
`
`<TITLE>Title of the Web page </TITLE>
`</HEAD>
`
`<BODY>
`
`An example of a simple
`<B>Web</B>
`page.
`</BODY>
`
`</HTML>
`As illustrated, required elements include the <HTML>,
`<HEAD>, <TITLE>, and <BODY> tags, together with any
`corresponding end tags. The tags used function as follows.
`The first pair of tags, <HTML></HTML>, defines the extent
`of the HTML markup text. The <HEAD></HEAD> tag pair
`contains descriptions of the HTML page; this meta infor-
`mation is not displayed as part of the Web page. The
`<TITLE></TITLE> tag pair describes the title of the page.
`This description is usually displayed by the browser as the
`title of the window in which the Web page is displayed. This
`information is also used by some search engines to compile
`an index of Web pages. The next tag pair, <BODY></
`BODY>, delimits the body of the Web page. In the body is
`the text to be displayed as well as HTML markup tags to hint
`at the format of the text. For example, the <B></B> tag pair
`displays the enclosed text
`in a bold typeface. Further
`
`6
`description of HTML documents is available in the technical
`and trade literature; see e.g., Ray Duncan, Power Program-
`ming: An HTML Primer; PC Magazine, Jun. 13, 1995, the
`disclosure of which is hereby incorporated by reference.
`2. HTTP communication
`HTTP is the foundation of the World Wide Web. This
`
`request/response protocol used on top of TCP (Transmission
`Control Protocol) carries commands from browsers to serv-
`ers and responses from servers back to browsers. HTTP is
`not a protocol, not for transferring hypertext per se, but for
`transmitting information with the efficiency necessary to
`make hypertext jumps. The data transferred by the protocol
`can be plain text, hypertext, audio, images, or any Internet-
`accessible information.
`
`HTTP is a transaction-oriented client/server protocol; it
`treats each transaction independently. A typical implemen-
`tation creates a new TCP connection between a client and a
`server for each transaction, then terminates the connection as
`soon as the transaction completes. Since the protocol does
`not require this one-to-one relationship between transaction
`and connection lifetimes, however, the connection can stay
`open so that more transactions can be made.
`The transaction-based approach of HTTP is well-suited to
`its typical application. A normal Web session involves
`retrieving a sequence of pages and documents. The sequence
`is,
`ideally, performed rapidly, and the locations of the
`various pages and documents may be widely distributed
`among a number of servers, located across the country or
`around the globe.
`In typical HTTP configuration, a client, such as a Web
`browser, initiates a request (HTTP message) for a resource,
`for instance, from a Web server where a desired home page
`is located. The client opens a direct connection that
`is
`end-to-end or direct connection between the client and the
`
`server. The client then issues an HTTP request. The request
`consists of a specific command (referred to as a method), a
`URL, and a message containing request parameters, infor-
`mation about
`the client, and perhaps additional content
`information. When the server receives the request,
`it
`attempts to perform the requested action and returns an
`HTTP response. The response includes status information, a
`success/error code, and a message containing information
`about the server, information about the response itself, and
`possible body content. The TCP connection is then closed.
`Instead of the end-to-end TCP connection between a
`
`client and a server, an alternative configuration employs one
`or more intermediary systems with TCP connections
`between (logically) adjacent systems. Each intermediary
`system acts as a relay, so that a request initiated by the client
`is relayed through the intermediary system(s) to the server,
`and the response from the server is relayed back to the client.
`A “proxy,” for example, is an intermediary system which
`acts on behalf of other clients and presents requests from
`other clients to a server. There are several scenarios that call
`
`for the use of a proxy. In one scenario, the proxy acts as an
`intermediary through a firewall. In this case, the server must
`authenticate itself to the firewall to set up a connection with
`the proxy. The proxy accepts responses after they have
`passed through the firewall.
`Clients and servers communicate using two types of
`HTTP messages: request and response. Arequest message is
`sent by a client to a server to initiate some action. Exemplary
`actions include the following.
`
`10
`
`15
`
`20
`
`25
`
`30
`
`35
`
`40
`
`45
`
`50
`
`55
`
`60
`
`65
`
`000008
`
`000008
`
`
`
`6,122,657
`
`C. Filter module
`
`1. Internal architecture
`
`GET:
`POST:
`
`PUT:
`
`DELETE:
`
`A request to fetch or retrieve information.
`A request to accept the attached entity as a new
`subordinate to the identified URL.
`A request to accept the attached entity and store it
`under the supplied URL. This may be a new resource with
`a new URL, or it may be a replacement of the contents of
`an existing resource with an existing URL.
`Requests that the origin server delete a resource.
`
`The server, in response to a request, returns a response
`message. A response message may include an entity body
`containing hypertext-based information.
`In addition,
`the
`response message must specify a status code, which indi-
`cates the action taken on the corresponding request. Status
`codes are organized into the following categories:
`
`INFORMATIONAL:
`
`SUCCESSFUL:
`
`REDIRECTION:
`CLIENT ERROR:
`
`SERVER ERROR:
`
`The request has been received and processing
`continues. No entity body accompanies this
`response.
`The request was successfully received,
`understood, and accepted.
`Further action is required to complete the request.
`Request contains a syntax error or request cannot
`be fulfilled.
`The server failed to fulfill an apparently valid
`request.
`
`Further description of HTTP is available in the technical and
`trade literature; see e.g., William Stallings, The Backbone of
`the Web, BYTE, October 1996, the disclosure of which is
`hereby incorporated by reference. As the explosive growth
`of the Web continues, and as new features are added to both
`browsers and servers, a standardized transfer protocol is
`essential
`to maintain the Web’s growing functions and
`interoperability. HTTP provides the standardized definition
`required to meet these needs.
`B. Approaches to filtering
`One approach to addressing the problem of spurious Web
`content is to adopt a server-based solution. For instance, one
`approach would be to employ a “proxy server” with the
`capability to perform text-based parsing (e.g., using PERL
`or AWK text processing). That approach is problematic,
`however. In particular, the proxy server (or set of proxy
`servers) presents a bottleneck through which Web content
`would have to be funneled. In addition to resource limita-
`
`tions (e.g., limited bandwidth), the approach also raises the
`issue of content ownership, such as the issue of mirror
`storage of copyrighted content at a proxy server.
`Abetter approach, therefore, is a client-side solution, one
`which can be implemented at each individual Web client.
`FIG. 2B is a block diagram illustrating the approach. The
`figure illustrates a Web client 245a with an Internet connec-
`tion to one or more Web servers 280. More particularly, the
`client 245a comprises a Web browser (e.g., Netscape Navi-
`gator or Microsoft Internet Explorer) operating on a personal
`computer (e.g., system 100) or workstation which commu-
`nicates with the Internet via a communication layer 241,
`such as Microsoft Winsock (Winsock. dll)—a Windows
`implementation of Transmission Control Protocol/Internet
`Protocol (TCP/IP). Interposed (functionally) between the
`browser 245a and the communication layer 241 is the Filter
`module 225 of the present invention. In this fashion, the
`Filter module 225 can trap and process all communications
`between the browser 245a and the communication layer
`241. With the advent of Winsock 2 (Microsoft Corp. of
`Redmond, Wash.), a third party module can register itself
`with the Winsock driver and,
`thereby,
`trap and process
`communication in a manner which has the support of the
`driver.
`
`FIG. 3 is a block diagram illustrating detailed internal
`architecture of the Filter module 225. As shown, the Filter
`module 225 includes an output stream 301, a dispatching
`loop 311, Filter logic 313, and an input stream 321. For
`assisting with user configuration of its operation, the Filter
`module 225 includes a graphical user interface (GUI)
`administration module 325. The input stream 321 is respon-
`sible for getting input; it interfaces directly with the Winsock
`communication driver. In a corresponding manner, the out-
`put stream 301 communicates with the (client) browser; it is
`responsible for providing output to the browser which is to
`be ultimately rendered on screen for the user. Accordingly,
`the output stream 301 represents the data pool right before
`it is sent to the browser. The Filter logic 313, on the other
`hand, represents the workhorse or core module for perform-
`ing the actual filtering. Its functionality is described in
`further detail below.
`
`At a high level, the module 225 operates as follows. The
`Web browser operates by generating requests for content,
`both for retrieving an initial Web page as well as for
`retrieving objects (e.g., bitmaps) intended for display on the
`page.
`In operation,
`the system issues fetch or GET
`commands, which are communicated to the server via the
`communication driver. Any such command is, however,
`trapped by the Filter module 225. The “real” request is at this
`point processed by the Filter core logic (Filter method) 313.
`At this point, the system can modify the command, delete
`the command, synthesize new commands, or pass through
`unchanged the existing command.
`2. Filter construction
`
`The detailed construction of the Filter core logic 313 is as
`follows. The Filter is implemented as a C language routine
`having an internal message or dispatcher loop which
`“switches” on different (HTML) tag types. Based on the
`particular tag being processed, the loop in turn dispatches
`the information to a particular handler, for performing the
`desired processing on that HTML tag type. As the example
`of this approach, consider, for instance, the processing of an
`image tag type. Upon encountering an image tag, the system
`dispatches the tag to the appropriate handler, an image tag
`handler. At
`this point,
`the handler can now proceed to
`process the information. For instance, the image handler
`could determine whether the image tag includes a reference
`to material which is to be filtered (e.g.,
`the image tag
`references an image stored in a “/ad/” directory). In the event
`that the handler “kills” the tag, the system employs a “kill”
`routine for correctly managing the buffer, including incre-
`menting the current buffer position beyond the tag (so that
`it can locate the next tag). Alternatively, the system can
`synthesize new tags, or pass through unchanged the existing
`tags.
`Certain tag types require more complex processing.
`Consider, for instance, a href or “hyper reference” tag type,
`which is employed for establishing a hyperlink. An instruc-
`tion to kill an href tag is, instead, an instruction to kill the
`image contained within the href tag. Accordingly, the cor-
`responding handler must include logic not for killing the
`href tag but, instead, for setting a status flag indicating that
`the system should cycle through (in the dispatcher loop) the
`tag and kill the image tag contained within the href tag.
`3. Filter methodology
`In accordance with the present invention, the Filter com-
`prises a core routine—the Filter method—for providing
`filtering functionality. In an exemplary embodiment,
`the
`Filter method may be constructed as follows (e.g., in the C
`programming language).
`
`10
`
`15
`
`20
`
`25
`
`35
`
`40
`
`45
`
`50
`
`55
`
`60
`
`65
`
`000009
`
`000009
`
`
`
`6,122,657
`
`10
`
`int Filter (char *Buffer, char *BaseURL, int BuffSize, BOOL CRCOn, BOOL *
`INJavaScript, BOOL * KillNeXtIMG)
`
`{i
`
`nt decrernent,val; // How much for the next buffer receive
`char * Found; // Where was it found
`char * IMGFound; // Where was it found
`char * BlinkFound; // Where was it found
`char * JavaFound;
`char * ScriptFound;
`char * HREFFound;
`char * Full_Tag;
`char * Left_Less_Than; // Keep up with that left greater than...
`char * Right Greater_Than; // Keep up with the other tag...
`char * Found_IMG_URL; // found img uri..
`char * THE_END = Buffer + BuffSize; // The real end of the buffer
`char * getstring;
`int Length;
`int count=0;
`int DoKill; // Are We at the end of the buffer
`pURL irngURL_struct;
`pURL Base_URL;
`DWORD Xor; // DWORD to do the XOR cornpare
`SOCKET hControlChannel; // Socket handle for the control channel
`HFILE hFile; // Handle for gif download save file
`BOOL Bad_Tag;
`Chunk imgloc;
`Chunk javabinloc;
`int imgsize;
`
`.°?:‘.°Y*.‘{‘.4?E'.°!‘.’!‘.‘
`
`irngURL_struct = (pURL) malloc (sizeof (URL));
`if ( irngURL_struct == NULL)
`
`{M
`
`essageBoX(NULL, “Malloc Failed in Filter(),
`irngURL_struct nEXpect a Crashl”,
`“Malloc Failed”, MB,OK|MB,ICONSTOP);
`
`}B
`
`ase_URL = (PURL) malloc (sizeof (URL));
`if ( Base URL == NULL)
`
`{M
`
`essageBoX(NULL, “Malloc Failed in Filter(), Base_URL nEXpect a Crashl”,
`“Malloc Failed”, MB,OK|MB,ICONSTOP);
`
`} /
`
`/ SpaWnonOff = 1; /* use for testing */
`
`////////////////////////////////////////////////////////////////
`//
`// In Java Script situation stuff
`//
`////////////////////////////////////////////////////////////////
`
`if (*INJavaScript)
`
`{ S
`
`criptFound = strstri (Buffer, “/script”);
`if (ScriptFound == NULL) // didn’t find the end
`
`{R
`
`ight_Greater_Than = (char *)rnernchr(Buffer, ‘<’, (THE_END -
`Buffer));
`if (Right_Greater_Than == NULL)
`
`{d
`
`ecrernent_val = (THE_END - Buffer);
`*INJavaScript = FALSE;
`if (Base_URL)
`free (Base_URL);
`if (irngURL_struct)
`free (irngURL_struct);
`return decrernentfval; //RETURN
`
`}B
`
`uffer = Right_Greater_Than - 1;
`#ifdef scriptd
`vErrorOut (fg_pink, “INJavaScript = False due to no Found /script and a
`found > n”);
`#endif
`*INJavaScript = FALSE;
`}else
`{
`
`000010
`
`000010
`
`
`
`11
`
`-continued
`
`6,122,657
`
`12
`
`#ifdef scriptd
`VErrorOut (fg_pink, “INJaVaScript = False due found /script n”);
`#endif
`*INJaVaScript = FALSE;
`Buffer = ScriptFound + 7; // just move on past the /script...
`}
`
`}/
`
`///////////////////////////////////////////////////////////////
`
`While (TRUE)
`
`{ X
`
`or = 0;
`imgsize = 0;
`Found = NULL;
`IMGFound = NULL;
`BlinkFound = NULL;
`JaVaFound = NULL;
`HREFFound = NULL;
`ScriptFound = NULL;
`Full_Tag = NULL;
`Bad_Tag = FALSE;
`Left_Less_Than = (char *)rnernchr (Buffer, ‘<’, (THE_END - Buffer + 1));
`if (Left_Less_Than == NULL)
`/* Can’t find a tag in the text at all -- This means We are done */
`
`{i
`
`f (Base URL)
`free (Base_URL);
`if (IrngURL_struct)
`free (irngURL_struct);
`
`return 0; //RETURN
`
`} i
`
`f (*(Left_Less_Than+1) == ‘I’
`
`&& *(Left_Less_Than+2) =
`&& *(Left_Less_Than+3) == ‘_’)
`
`{R
`
`ight_Greater_Than = strstr (Left_Less_Than, “——>”);
`if (Right_Greater Than == NULL)
`
`{R
`
`ight_Greater_Than
`= (char *)rnernchr(Left_Less_Than, ‘>’, (THE_END - Left_Less_Than));
`if (Right_Greater_Than == NULL)
`
`{d
`
`ecrernent_Val = (THE END - Left Less Than);
`if (decrernent_Val > 0)
`
`{i
`
`f (Full_Tag)
`free (Full_Tag);
`if (Base_URL)
`free (Base_URL);
`if (irngURL_struct)
`free (irngURL_struct);
`return 0; //RETURN
`
`} i
`
`f (Full_Tag)
`free (Full_Tag);
`if (Base_URL)
`free(Base_URL);
`if (irngURL_struct)
`free (irngURL_struct);
`
`return decrernentfval; //RETURN
`
`}}R
`
`ight_Greater_Than +=2;
`/* Buffer is going to be assigned Right_Greater Than + 1 */
`IMGFound = NULL;
`/* This is to cause a break and a continue to the next buffer */
`
`}/
`
`///////////////
`else // Its not a comment
`
`{R
`
`ight_Greater_Than
`
`00001 1
`
`78:
`79:
`80:
`81:
`82:
`83:
`84:
`85:
`86:
`87:
`88:
`89:
`90:
`91:
`92:
`9