'''
bing_spider.py

Copyright 2006 Andres Riancho

This file is part of w3af, w3af.sourceforge.net .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

'''
from urllib2 import URLError

import core.controllers.outputManager as om

# options
from core.data.options.option import option
from core.data.options.optionList import optionList

from core.controllers.basePlugin.baseDiscoveryPlugin import baseDiscoveryPlugin
from core.controllers.w3afException import w3afException
from core.controllers.w3afException import w3afRunOnce
from core.controllers.misc.is_private_site import is_private_site

from core.data.searchEngines.bing import bing as bing


class bing_spider(baseDiscoveryPlugin):
    '''
    Search Bing to get a list of new URLs
    @author: Andres Riancho ( andres.riancho@gmail.com )
    '''

    def __init__(self):
        baseDiscoveryPlugin.__init__(self)
        self._run = True
        # User variables
        self._resultLimit = 300
        # Internal variables
        self._fuzzableRequests = []

    def discover(self, fuzzableRequest):
        '''
        @parameter fuzzableRequest: A fuzzableRequest instance that contains
                                    (among other things) the URL to test.
        '''
        if not self._run:
            # This will remove the plugin from the discovery plugins to be run.
            raise w3afRunOnce()

        # I will only run this one time. All calls to bing_spider return the same url's
        self._run = False
        bingSE = bing(self._uri_opener)
        domain = fuzzableRequest.getURL().getDomain()

        if is_private_site(domain):
            msg = 'There is no point in searching Bing for "site:'+ domain + '".'
            msg += ' Bing doesnt index private pages.'
            raise w3afException( msg )

        results = bingSE.getNResults('site:'+ domain, self._resultLimit)

        for res in results:
            self._run_async(meth=self._genFuzzableRequests, args=(res.URL,))
        self._join()

        return self._fuzzableRequests

    def _genFuzzableRequests(self, url):
        '''
        GET the URL and then call createFuzzableRequests with the response.

        @parameter url: The URL to GET.
        '''
        try:
            response = self._uri_opener.GET(url, cache=True)
        except KeyboardInterrupt, k:
            raise k
        except w3afException, w3:
            om.out.error('Exception while requesting ' + url + ' ' + str(w3))
        except URLError, url_err:
            om.out.debug('URL Error while fetching page in bing_spider, error: ' + str(url_err))
        else:
            fuzzReqs = self._createFuzzableRequests(response)
            self._fuzzableRequests.extend(fuzzReqs)

    def getOptions( self ):
        '''
        @return: A list of option objects for this plugin.
        '''
        d2 = 'Fetch the first "resultLimit" results from the Google search'
        o2 = option('resultLimit', self._resultLimit, d2, 'integer')
        ol = optionList()
        ol.add(o2)
        return ol

    def setOptions( self, optionsMap ):
        '''
        This method sets all the options that are configured using the user interface 
        generated by the framework using the result of getOptions().

        @parameter OptionList: A dictionary with the options for the plugin.
        @return: No value is returned.
        '''
        self._resultLimit = optionsMap['resultLimit'].getValue()

    def getPluginDeps( self ):
        '''
        @return: A list with the names of the plugins that should be run before the
        current one.
        '''
        return []

    def getLongDesc( self ):
        '''
        @return: A DETAILED description of the plugin functions and features.
        '''
        return '''
        This plugin finds new URL's in Bing search engine.

        One configurable parameters exist:
            - resultLimit

        This plugin searches Bing for : "site:domain.com", requests all search results and parses them in order
        to find new mail addresses.
        '''
