package AmphetaDesk::Channels;
###############################################################################
# AmphetaDesk                                           (c) 2000-2002 Disobey #
# morbus@disobey.com                      http://www.disobey.com/amphetadesk/ #
###############################################################################
# ABOUT THIS PACKAGE:                                                         #
#   This package contains all the various functions that read our different   #
#   flavors of RSS or scriptingNews. It's pretty crucial to happy operation.  #
#                                                                             #
# LIST OF ROUTINES BELOW:                                                     #
#   load_channel - loads the channel data from the passed filename/string.    #
###############################################################################
#                    "My throat hurts, and I want to cry."                    #
###############################################################################

use strict; $|++;
use AmphetaDesk::MyChannels;
use AmphetaDesk::Settings;
use AmphetaDesk::Utilities;
use File::Spec::Functions;
use XML::Simple;
require Exporter;
use vars qw( @ISA @EXPORT );
@ISA = qw( Exporter );
@EXPORT = qw( load_channel );

###############################################################################
# load_channel - loads the channel data from the passed filename/string.      #
###############################################################################
# USAGE:                                                                      #
#    $channel_object = load_channel( $channel_xml, $channel_url );            #
#                                                                             #
# NOTES:                                                                      #
#    This routine looks at $channel_xml, determines if it's a filename or     #
#    just plain xml, and returns a common data structure, regardless of the   #
#    underlying xml spec. It'll also modify all internal links to user pref.  #
#                                                                             #
#    This routine can also accept, as a second optional argument, the XML     #
#    URL of the $channel_xml. If this is passed, it's used to correlate the   #
#    $channel_xml with a "My Channels" subscription (determined by the $url). #
#    This allows us to update our subscription information with any new       #
#    data received from a good parse.                                         #
#                                                                             #
# RETURNS:                                                                    #
#    $data; the data structure of the xml in $channel_xml.                    #
###############################################################################

sub load_channel {

   my ($channel_filename, $channel_url) = @_;

   # a variable that will be loaded
   my $channel_xml; # with data, regardless
   # of whether it's a raw string or a file.

   # if the "filename" has formatting data, then assign it to our final xml.
   # we're assuming that it must be raw XML data coming in from add_url.
   # it's important that this test is FIRST, else we'll get errors when
   # we try to stat the existence of a raw string (in our other tests).
   if ($channel_filename =~ /\s|\n|\f|\r|\t/) {
      $channel_xml = $channel_filename;
      $channel_filename = "the channel xml";
   }

   # if this file exists, slurp it in.
   elsif (-e catfile(get_setting("dir_data_channels"), $channel_filename)) { 
      open (XML, catfile(get_setting("dir_data_channels"), $channel_filename))
            or note("There was an error opening $channel_filename: $!");
      local $/ = undef; $channel_xml = <XML>; close(XML);
   }

   # if it doesn't exist, then we must have no data for it,
   # so we create a dummy RSS file to return with an error <item>.
   elsif (!-e catfile(get_setting("dir_data_channels"), $channel_filename)) { 
      my ($data, $item); # now, fake an <item> with the error message.
      $item->{description}  = "There was an error loading this channel, as ";
      $item->{description} .= "AmphetaDesk could not download any data to parse. ";
      $item->{description} .= "AmphetaDesk will keep trying in the future ";
      $item->{description} .= "but the site or channel may be dead (on the other hand, ";
      $item->{description} .= "it could just be having connectivity issues). <strong>If all ";
      $item->{description} .= "the channels have failed to download</strong>, then you may ";
      $item->{description} .= "be using a proxy server (which you can set under the 'My Settings' ";
      $item->{description} .= "page), or firewall/security software which needs to be configured to ";
      $item->{description} .= "allow AmphetaDesk access to the Internet. Don't hesitate to email ";
      $item->{description} .= "<a href=\"mailto:morbus\@disobey.com\">morbus\@disobey.com</a> if ";
      $item->{description} .= "you need further help concerning this error message.";
      push (@{$data->{item}}, $item); # add it to our returned feed.
      return $data;
   }

   # add 'target's to all of our links. we should
   # probably move this to a per-feed "process user prefs".
   my $link_target = get_setting("user_link_target");
   $channel_xml =~ s/a href="([^"]+)"/a href="$1" target=\"$link_target\"/gi;
   $channel_xml =~ s/a href='([^']+)'/a href="$1" target=\"$link_target\"/gi;
   $channel_xml =~ s/a href=&quot;([^&quot;]+)&quot;/a href="$1" target=\"$link_target\"/gi;

   # workaround for a segfault on Win32. see, we need expat 1.95.2 or higher
   # to parse UTF8-BOM files, but the only way on Win32 we can get that is
   # by upgrading our perl to 5.6.1, which we can't do, since it screws up 
   # our binary runtime (no known fix for that yet). so, in this case, we
   # check for the existence of the byte order mark (0xEF 0xBB 0xBF). if
   # we see it, we remove it before we pass it onto our parser. sigh. the
   # only rss we know of for this is http://www.dublincore.org/news.rss
   if ($channel_xml =~ /^\xEF\xBB\xBF/)  # we now do this for all OS's 
        { $channel_xml =~ s/^\xEF\xBB\xBF//; } # because it's not yet perfect.

   # load the data. we don't do any XML fixing, as per
   # earlier versions of AmphetaDesk, because that's just naughty.
   my $data = eval { XMLin($channel_xml, forcearray=> [ "item" ],
                     keyattr => [ ], suppressempty=>undef ) };

   # if the feed has two or more <channel> tags, then we return now
   # with an error message, since moving further would cause crashes
   # due to our assumptions of what the tree should look like. rare.
   if (ref($data->{channel}) eq 'ARRAY') { return 0; }

   # oop!
   if ($@) {
      $@ =~ s/[\r\n\f]//g; # make sure it's on one line for logging.
      note("There was an error loading $channel_filename: $@.");

      # remove the extra info that's of no use.
      my $error = $@; $error =~ s/(at.*)at.*/$1./g;

      my $item; # now, fake an <item> with the error message.
      $item->{description}  = "There was an error parsing this channel. ";
      $item->{description} .= "The parser reported: <em>$error</em> ";
      $item->{description} .= "You may wish to contact the owner of this channel ";
      $item->{description} .= "informing them of this XML parsing error, but ";
      $item->{description} .= "AmphetaDesk will keep trying to get new updates for you.";
      push (@{$data->{item}}, $item); # add it to our returned feed.
      return $data; # and return gleefully to our parent.
   }

   # now, figure out if we need to shift things around to 
   # create a communal data structure for our templates.
   if ($channel_xml =~ /<rss.*version/i) {

      # if there are any <items> inside a <channel>, place them
      # at the root of the data structure so that we can have
      # $data->{item} and not $data->{channel}->{item} in the 
      # templates. this matches the rss 1.0 layout.
      my $items_copy; $items_copy->{item} = $data->{channel}->{item};
      undef $data->{channel}->{item}; # remove this hierarchy.
      unless (ref $items_copy->{item} ne 'ARRAY') {
        foreach my $item (@{$items_copy->{item}}) {
           push(@{$data->{item}}, $item);
        }
      }
   }
   elsif ($channel_xml =~ /<scriptingNews>/i) {

      # scriptingNews needs to have links manually inserted around
      # the matching <linetext> in the <item> itself. we do this
      # transition, and move everything to a {description} hash
      # for our templates. we also add our link targets.
      my $items_copy; $items_copy->{item} = $data->{item};
      undef $data->{item}; # remove this hierarchy.
      foreach my $item (@{$items_copy->{item}}) {
         next unless $item->{text};

         # radio has a bug in it's sN implementation, where
         # for every <item> a n+1 <item> exists with n </a> tags.
         next if $item->{text} =~ /^(<\/a>)+$/;

         # we check to see if the {link} is a HASH or not. we 
         # *could* fix this by forcearraying "link" in our 
         # XML::Simple XMLin, but then the template code would
         # be something like $item->{link}[0], and that's ugly.
         if (ref($item->{link}) eq 'HASH') {
            next unless $item->{link}->{url};
            next unless $item->{link}->{linetext};
            eval { $item->{text} =~ s/$item->{link}->{linetext}/<a href=\"$item->{link}->{url}\" 
                                     target=\"$link_target">$item->{link}->{linetext}<\/a>/i; };
         }
         elsif (ref($item->{link}) eq 'ARRAY') {
            foreach my $link (@{$item->{"link"}}) {
               eval { $item->{text} =~ s/$link->{linetext}/<a href=\"$link->{url}\" 
                                       target=\"$link_target">$link->{linetext}<\/a>/i; };
            }
         }

         my $new_item; # add it to our structure.
         $new_item->{description} = $item->{text};
         push (@{$data->{item}}, $new_item); 
      }
   }

   # if we got a URL passed, see if we can update
   # our subscription list data with the newly parsed data.
   if ($channel_url) { update_my_channel_data($channel_url, $data); }

   return $data;

}

1;
