#! /usr/bin/python

# TPG's IPTV xmltv generator
# By Jean-Yves Avenard: jean-yves@avenard.org
#
# Copyright (c) 2008-2009, JEAN-YVES AVENARD
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the <organization> nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY JEAN-YVES AVENARD ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL JEAN-YVES AVENARD BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Derived from scraper.py by Michael Foord
# Released subject to the BSD License
# Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt
# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml
#
# Big thanks to Stephen Dredge from TPG for his ongoing support and help
#

NAME = "tv_grab_au_tpg"
VERSION = '0.4.10'
DESCRIPTION = "Australia (TPG's IPTV from various sources)"
PREFERREDMETHOD = 'allatonce'
CAPABILITIES = """baseline
manualconfig
preferredmethod"""

__doc__ = """Export TPG's IPTV channel information

This grabber is distributed under the BSD License.

Usage: tv_grab_au_tpg [options]

Options:
    -h / --help
        Print this message and exit.

    --version
        Print the version

    --quiet
        Suppress all progress information (but there are none anyway).

	--verbose
        Display additional information (for debugging only)

    --configure
        Does nothing, purely provided to work with mythtv

    -d <N> / --days <N>
        Grab N days.  Defaults is grabbing as many days of data as is available
        on the source.

    --offset <N>
        Start grabbing N days in the future.  Defaults to 0; starting grabbing
        with today's data.

    -o <filename> / --output <filename>
        By default, the output is directed to stdout. Write standard output to
        filename instead.

    --capabilities
        List XMLTV capabilities.
    
    --pretty
        Output XML in pretty/readable mode.

    --preferredmethod
        Tell the calling program that we prefer to return all the data at once.
    
    --description
        Tell everyone that we're an Australian grabber.

"""

  
import sys
import os
import re
import string
import StringIO
from xml.dom.minidom import Document
import xml.dom.minidom
import getopt
import datetime
import pytz
import copy
import htmlentitydefs

#For opening the URL
import socket
import urllib2
import urllib
import cookielib

#Define which channels are active
#grab_channel = {
#				'tv5' : False, 'aljazeera': False, 'tve': False , 'dw': False, 'bvn': False,
#				'duna': False, 'trtint': False, 'cuba' : False, 'sctv': False, 'newsasia' : False,
#				'rt' : False, 'ertworld' : False, 'rtm': False, 'voa': False, 'eurosport': False, 'eurosportnews': False,
#				'bloomberg': True, 'tpg': False, 'thaitv5': False, 'vtv4' : False, 'tvri' : False, 'ddsport' : False,
#				'worldfashion' : False, 'cctv': False, 'tpgnasa': False
#				}

grab_channel = {
				'tv5' : True, 'aljazeera': True, 'tve': True , 'dw': True, 'bvn': True,
				'duna': True, 'trtint': True, 'cuba' : True, 'sctv': True, 'newsasia' : True,
				'rt' : True, 'ertworld' : True, 'rtm': True, 'voa': True,'eurosport': True, 'eurosportnews': True,
				'bloomberg': True, 'tpg': True, 'thaitv5' : True, 'vtv4': True, 'tvri' : True,
				'worldfashion': True, 'cctv': True, 'tpgnasa': True
				}

time_zone = { 'SA' : [930,1,238] , 'WA' : [800,1,241], 'NT' : [930,0,336], 'ACT' : [1000,1,15], 'VIC' : [1000,1,240], 'NSW' : [1000,1,235], 'QLD' : [1000,0,237], 'TAS' : [1000,1,239] }

#TV5 global settings
tv5url = 'http://www.tv5.org/TV5Site/programmes/universal/pop_print.php?descr=1'
cookietv5 = { 'cid_signal':'4', 'cid_zone': '240', 'c_id_signal':'4', 'c_id_version':'1', 'c_id_zone':'240' , 'c_id_trad': '3' }
tv5channelname = 'TV5'
tv5idname = 'tv5'
maxtv5cookie = 1269604634	#Valid until somewhere in 2010
tv5mainurl = 'www.tv5.org'
statetv5 = [
	['ul','li','_date_','ul','li','_read_',-1],
	['li','strong','_time_','a','_title_','em','_category_',-1],
	['li','strong','_time_','a','_title_','em','_category_','span','p','_desc_',-1],
	['li','strong','_time_','a','_title_','em','_category_','span','p','span','_subtitle_','_desc_',-1],
	['li','strong','_time_','a','_title_','em','_category_','span','p','span','_subtitle_',-1],
	['li','strong','_time_','a','_title_','em','_category_','img','span','p','_desc_',-1],
	['li','strong','_time_','a','_title_','em','_category_','img','span','p','span','_subtitle_','_desc_',-1],
	['li','strong','_time_','a','_title_','em','_category_','img','img','span','p','_desc_',-1],
	['li','strong','_time_','a','_title_','em','_category_','img','img','span','p','span','_subtitle_','_desc_',-1],
	]

#Al Jazeera settings
ajurl = 'http://xmltv.radiotimes.com/xmltv/2055.dat'
ajchannelname = 'Al Jazeera'
ajidname = 'aljazeera'

#TVE International settings
tveurl = 'http://www.rtve.es/tve/programo/avan3/tv3s'
tvechannelname = 'TVE International'
tveidname = 'tve'
statetve = [
	['strong','u','_date_','_title_',-1],
	['strong','u','_date_','_title_','_desc_',-1],
	['strong','_title_',-1],
	['strong','_title_','_desc_',-1],
	['strong','_title_','strong','_subtitle_',-1],
	['strong','_title_','strong','_subtitle_','_desc_',-1]]


#lyngsat format: 0: id, 1: url, 2: name, 3: language, 4: timezone , 5: dst
lyngsat = [
#	[ 'dw', 'http://www.lyngsat-guide.com/DW-TV.html', 'DW-TV', 'de', 200, 0 ],
#	[ 'bvn', 'http://www.lyngsat-guide.com/BVN-TV.html', 'Dutch TV', 'nl', 200, 0 ],
#	[ 'cctv4', 'http://www.lyngsat-guide.com/CCTV-4.html', 'Chinese TV 1' ,'cn', 200, 0 ],
#	[ 'duna', 'http://www.lyngsat-guide.com/Duna-TV.html', 'Hungarian TV' ,'hu', 200, 0 ],
#	[ 'trtint', 'http://www.lyngsat-guide.com/TRT-International.html', 'TRT International' ,'tr', 200, 0 ],
	]

statelyngsat = [
	['tr','td','td','font808080','b','_time_','td','font808080','b','_title_','font808080','_desc_',-1],
	['tr','td','td','fontblack','b','_time_','td','fontblack','b','_title_','fontblack','_desc_',-1],
	['tr','td','td','fontblack','b','_time_','td','fontblack','b','_title_','img','fontblack','_desc_',-1],
	]

#Cubavision settings
cubaurl = 'http://www.cubavision.cubaweb.cu/satelite.asp'
cubachannelname = 'Cuba Vision'
cubaidname = 'cuba'
statecuba = [
	['table','tr','td','_read_', 'td','_time_','td','_read_','td','_title_', 'td', -1],
	]

#SCTV settings
sctvurl = 'http://www.communitytv.com.au/p3.htm'
sctvchannelname = 'SCTV'
sctvidname = 'sctv'
statesctv = [
	['strong','_date_',-1],
	['br','_title_',-1],
	['span','_title_',-1],
	]

#Channel NewsAsia
newsasiaurl = 'http://www.mediacorpsingapore.com/tvguide/cnasing_list_daily.asp'
newsasiachannelname = 'Channel NewsAsia'
newsasiaidname = 'newsasia'
statenewsasia = [
	[ 'p','font','_date_','table',-1],
	[ 'tr','td','font','_time_','td','font','_title_','font','_subtitle_','font','i','_desc_',-1],
	[ 'tr','td','font','_time_','td','font','_title_','font','_subtitle_',-1],
	]

#Russia Today
russiaurl = 'http://www.russiatoday.ru/schedule/date'
russiachannelname = 'Russia Today'
russiaidname = 'rt'
staterussia = [
	['_time_','br',-1],
	['_time_','a','_desc_','br',-1],
	]

#Greek TV
ertworldurl = 'http://tvradio.ert.gr/en/worldprogram.asp'
ertworldchannelname = 'Greek TV'
ertworldidname = 'ertworld'
stateertworld = [
	[ 'tr','td','td','div','_time_', 'br',-1 ],
	[ 'br','_time_', 'br',-1 ],
	]
	
#Malaysian TV
rtmurl = 'http://www.rtm.net.my/tvschedule/bi/schedule1.php'
rtmchannelname = 'Malaysian TV'
rtmidname = 'rtm'
statertm = [
	[ 'tr', 'td', 'span', '_date_', -1 ],
	[ 'tr', 'td', 'font333333','span','_time_','td','span','_title_', -1 ],
	]

#Voice of America
voaurl = 'http://ibb7-2.ibb.gov/tvschedule/getlisting.cfm'
voachannelname = 'Voice of America'
voaidname = 'voa'
statevoa = [
	[ 'tr', 'td', 'b', '_data_', '_data2_', -1 ],
	[ 'tr', 'td', 'b', '_data_', '_data2_', 'b', '_data_', '_data2_', -1 ],
	[ 'tr', 'td', 'b', '_title_', 'a', '_title2_', -1 ],
	[ 'tr', 'td', 'b', '_title_', 'a', '_title2_', '_title3_', -1 ],
	]

#Classic, Playboy and Adult One
tpgurl = 'http://tpg.com.au/iptv/guide_playboy_adult_classic.xmltv'
tpgnasaurl = 'http://tpg.com.au/iptv/guide_nasatv.xmltv'

#Eurosport
eurosporturl = 'http://yahoo.eurosport.com/tvschedule_clng0'
eurosportchannelname = 'Eurosport'
eurosportidname = 'eurosport'
stateeurosport = [
	[ 'table', 'tr', 'td', 'div', '_ignore_', 'div', 'span', '_ignore_', -1],
	[ 'td', 'div', '_time_', 'div', 'span', '_title_', 'div', 'p', 'a', '_desc_', -1],
	]

#Eurosport News
eurosportnewschannelname = 'Eurosport News'
eurosportnewsidname = 'eurosportnews'

#Bloomberg
bloombergurl = 'http://www.bloomberg.com/tvradio/tv/schedule_asia'
bloombergchannelname = 'Bloomberg Television'
bloombergidname = 'bloomberg'
statebloomberg = [
	[ 'table', 'tr', 'td', 'span', '_ignoretime_', 'td', 'span', '_ignoretitle_', 'td', 'span', '_ignoredesc_', -1 ],
	[ 'tr', 'td', 'span', '_time_', 'td', 'span', '_title_', 'td', 'span', '_desc_', -1 ], 
	]

#Thai TV5
thaitv5url = 'http://www.tv5.co.th/programs/day.php'
thaitv5channelname = 'Thai TV5'
thaitv5idname = 'thaitv5'
statethaitv5 = [
	[ 'aentry', '_time_' , -1],
	]

#VTV 4:
vtv4url = 'http://www.vtv.vn/VN/TrangChu/LichPhatSong/VTV4'
vtv4channelname = 'VTV4'
vtv4idname = 'vtv4'
statevtv4 = [
	[ 'divschedule' , 'divtime' , '_time_' , 'divprogram', '_title_', 'divcontent', 'script', '_ignore_', '_desc_', -1 ]
	]

#TVRI
tvriurl = 'http://www.tvri.co.id'
tvrichannelname = 'Indonesian TV'
tvriidname = 'tvri'
statetvri = [
	[ 'tr', 'td', 'div',  '_time_', 'td', 'td', 'a', '_title_', -1 ],
	]

#World Fashion
wofurl = 'http://www.wfc.tv/pl/'
wofchannelname = 'World Of Fashion'
wofidname = 'worldfashion'
statewof = [
	[ 'tr', 'td', 'span', '_time_',  '_ignore_', 'td', '_ignore_', 'td', 'a', '_title_', -1 ],
	]

#CCTV
cctvchannels = [
	{ 'id': 'cctv4', 'name': 'Chinese TV1', 'number': 5 },
	{ 'id': 'cctv9', 'name': 'Chinese TV2', 'number': 12 },
	{ 'id': 'cctv1', 'name': 'Chinese TV3', 'number': 2 },
	{ 'id': 'cctv-music', 'name': 'Chinese TV4', 'number': 18 },
	{ 'id': 'cctv11', 'name': 'Chinese TV5', 'number': 14 },
	{ 'id': 'cctv2', 'name': 'Chinese TV6', 'number': 3 },
	]

cctvurl= 'http://tv.cctv.com/soushi/05'
googletranslatechinese = 'http://209.85.171.104/translate_c?hl=en&langpair=zh|en&u='
statecctv = [
	[ 'spantime', '_time_', 'div', 'span', 'spangoogle-src-text', 'a', 'spantitle', '_desc_', 'a', 'spantitle', '_title_', -1 ],
	]

#DW
dwurl = 'http://www9.dw-world.de/regionalisierung/programm.php'
dwchannelname = 'DW-TV'
dwidname = 'dw'
statedw = [
	[ 'tdtimecellday[0-9]', '_time_', 'tdbcasttitle', 'span', 'a', '_title_', 'spanbcastdesc', '_desc_', -1 ],
	]

#TRT
trturl = 'http://www.trt.net.tr/TV/TvAkis.aspx'
trtchannelname = 'TRT International'
trtidname = 'trtint'
statetrt = [
	[ 'tr', 'tdprogramsaat', '_time_', 'tdprogramad', '_title_', -1 ],
	[ 'tr', 'tdprogramsaat', '_time_', 'tdprogramad', 'a', '_title_', -1 ],
	]

#BVN
bvnurl = 'http://www.bvn.nl/tvgids/tvgids'
bvnchannelname = 'Dutch TV'
bvnidname = 'bvn'

#Duna
dunaurl = 'http://www.dunatv.hu/felsomenu/musorujsag'
dunachannelname = 'Hungarian TV'
dunaidname = 'duna'
stateduna = [
	[ 'divmusortime' , '_time_', -1 ],
	[ 'divmusortitle', '_title_', -1 ],
	[ 'divmusorsubtitle', '_subtitle_', -1],
	[ 'divmusorgenre', '_desc_', -1 ],
	[ 'divmusorsynops', '_desc_', -1 ],
	[ 'divmusorkozrem', '_desc_', -1 ],
	[ 'divmusorepisodnum', '_desc_', -1 ],
	]

#default setting values
maxdays = 7
day_offset = 0
default_config_dir = "~/.xmltv"
config_file = ""
verbose = 0
item_config = { 'TimeZone' : [('SA','WA','NT','ACT','VIC','NSW','QLD','TAS'),'VIC'] }
monthwinter = 3
monthsummer = 10
localtimezone = 1000
dst = 0
idprefix = 'tpg.'
converttz = False

#namefind is supposed to match a tag name and attributes into groups 1 and 2 respectively.
#the original version of this pattern:
# namefind = re.compile(r'(\S*)\s*(.+)', re.DOTALL)
#insists that there must be attributes and if necessary will steal the last character
#of the tag name to make it so. this is annoying, so let us try:
namefind = re.compile(r'(\S+)\s*(.*)', re.DOTALL)

attrfind = re.compile(
	r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
	r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')			# this is taken from sgmllib

def usage():
	print __doc__

class Error(Exception):
#Code:
#1xx : configuration error
#2xx : url fetching error
#3xx : content issue
#4xx : post-processed issue

	def __init__(self, message,code):
		self.message = message
		self.code = code
	def __str__(self):
		return repr(self.message)

class Config(object):

	def __init__(self, config_file, init):
		self.config = {}
		if not config_file:
			config_dir = os.path.expanduser(default_config_dir)
			if init:
				# Create config directory if it doesn't exist
				if not os.path.isdir(config_dir):
					try:
						os.mkdir(config_dir)
					except:
						raise Error('Failed to create config directory: %s' % config_dir, 100)
			self.config_file = os.path.join(config_dir, '%s.conf' % NAME)
		else:
			self.config_file = os.path.expanduser(config_file)

		if not init:
			try:
				f = open(self.config_file)
			except IOError:
				self.exists = False
			else:
				self.exists = True
				for line in f:
					m = re.compile(r"([a-zA-Z0-9\s]+):([a-zA-Z0-9]+)$").match(line)
					if m:
						self.config[m.group(1)] = m.group(2)
					else:
						raise Error("Invalid config file, run with --configure again",101)

	def initconfig(self):
		answer = {}
		for x in item_config:
			option = ''
			for i in item_config[x][0]: option += i + ','
			valid = 0
			while not valid:
				answer[x] = raw_input('%s - %s: (%s)\t:' % ( x , option, item_config[x][1]) )
				#Set default answer
				if answer[x] == '':
					answer[x] = item_config[x][1]
					valid = 1
				else:
					#Check if answer is valid
					for i in item_config[x][0]:
						if i == answer[x]:
							valid = 1
							break
					if not valid:
						print >> sys.stderr, 'Incorrect Answer'
		self.config = answer

	def write(self):
		try:
			f = open(self.config_file, 'w')
		except IOError:
			raise Error("Couldn't write config file",102)
		else:
			answer = self.config
			for x in answer:
				f.write('%s:%s\n' % (x , answer[x]))
			f.close()

class Scraper:
	
	def __init__(self, arg1, completefont=True, completeclass=False):
		"""Initialise a parser."""
		self.buffer = ''
		self.outfile = ''
		self.processed = []
		self.matchtable = arg1
		#Initialise state table.
		self.workingtable = []
		self.statetable = []
		self.htmlpos = 0
		self.completedblock = []
		self.completefont = completefont
		self.completeclass = completeclass

	def reset(self):
		"""This method clears the input buffer and the output buffer."""
		self.buffer = ''
		self.outfile = ''
		self.processed = []
		self.matchtable = []
		self.workingtable = []
		self.statetable = []

	def push(self):
		"""This returns all currently processed data and empties the output buffer."""
		data = self.outfile
		self.outfile = ''
		return data

	def close(self):
		"""Returns any unprocessed data (without processing it) and resets the parser.
		Should be used after all the data has been handled using feed and then collected with push.
		This returns any trailing data that can't be processed.

		If you are processing everything in one go you can safely use this method to return everything.
		"""
		data = self.push() + self.buffer
		self.buffer = ''
		self.processed = []
		self.matchtable = []
		self.workingtable = []
		self.statetable = []
		return data

	def feed(self, data,ignoretag=None, emptyfield=True, replacecr=False, replacecrval=' '):
		"""Pass more data into the parser.
		As much as possible is processed - but nothing is returned from this method.
		"""
		self.index = -1
		self.tempindex = 0
		self.buffer = self.buffer + data
		self.emptyfield = emptyfield
		self.replacecr = replacecr
		self.replacecrval = replacecrval

		outlist = []
		thischunk = []
		while self.index < len(self.buffer)-1:		  # rewrite with a list of all the occurences of '<' and jump between them, much faster than character by character - which is fast enough to be fair...
			self.index += 1
			inchar = self.buffer[self.index]
			if inchar == '<':
				ok, result, attrs, thetag = self.tagstart(ignoretag)
				#If tag is to be ignored, replace it with a '\n'
				if ok and ignoretag and re.compile(ignoretag).match(result.lower()):
					thischunk.append('\n')
				else:
					outlist.append(self.pdata(''.join(thischunk)))
					thischunk = []
					if ok: result = self.handletag(result, attrs, thetag)
					if result: outlist.append(result)
					if self.tempindex: break
			else:
				thischunk.append(inchar) 
		if self.tempindex:
			self.buffer = self.buffer[self.tempindex:]
		else:
			self.buffer = ''
			if thischunk: self.buffer = ''.join(thischunk)
		self.outfile = self.outfile + ''.join(outlist)

	def tagstart(self,ignoretag):
		"""We have reached the start of a tag.
		self.buffer is the data
		self.index is the point we have reached.
		This function should extract the tag name and all attributes - and then handle them !."""
		test1 = self.buffer.find('>', self.index+1)
		test2 = self.buffer.find('<', self.index+1)		 # will only happen for broken tags with a missing '>'
		test1 += 1
		test2 += 1
		if not test2 and not test1:					 
			self.tempindex = self.index				  # if we get this far the buffer is incomplete (the tag doesn't close yet)
			self.index = len(self.buffer)			   # this signals to feed that some of the buffer needs saving
			return False, False, 0, 0
		if test1 and test2:
			test = min(test1, test2)
			if test == test2:		   # if the closing tag is missing and we're working from the next starting tag - we eed to be careful with our index position...
				mod=1
			else:
				mod=0
		else:
			test = test1 or test2
			if test2:
				mod=1
			else:
				mod=0
		thetag = self.buffer[self.index+1:test-1].strip()

		if mod:				   # as soon as we return, the index will have 1 added to it straight away
			self.index = test -2
		else:
			self.index = test -1
			
		if thetag.startswith('!'):			   # is a declaration or comment
			return False, self.pdecl(thetag), 0, 0
		if thetag.startswith('?'):
			return False, self.ppi(thetag), 0, 0		  # is a processing instruction 

		if thetag.startswith('/') and not (ignoretag and re.compile(ignoretag).match(thetag.lower()[1:])):
			return False, self.endtag(thetag), 0, 0			  # is an endtag 
		#Remove leading / as this tag is going to be ignored anyway
		if thetag.startswith('/'):
			thetag = thetag[1:]
		nt = namefind.match(thetag)
		if not nt: return False, self.emptytag(thetag), 0, 0							  # nothing inside the tag ?
		name, attributes = nt.group(1,2)

		matchlist = attrfind.findall(attributes)
		attrs = []
		#the doc says a tag must be nameless to be "empty" so kill
		#next line that calls any tag with no attributes "empty"
		#if not matchlist: return self.emptytag(thetag)							  # nothing inside the tag ?
		for entry in matchlist:
			attrname, rest, attrvalue = entry			   # this little chunk nicked from sgmllib - except findall is used to match all the attributes
			if not rest:
				attrvalue = attrname
			elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
				 attrvalue[:1] == '"' == attrvalue[-1:]:
				attrvalue = attrvalue[1:-1]
			attrs.append((attrname.lower(), attrvalue))
		n = name.lower()
		return True, name.lower(), attrs, thetag			  # deal with what we've found.

	def checkifcompleted(self,state,i,completedblock):
		matchtable = self.matchtable
		#Check if we have reached the end of the table, if so, add the completed block to the completed list
		if matchtable[state[i]['linematch']][state[i]['position']] == -1:
			#Remove all previously completed block starting at or after the newly completed block
			for j in reversed(range(len(completedblock))):
				if completedblock[j]['begin'] >= state[i]['begin']:
					if verbose > 2:
						print 'deleted completed block %d' % j
					del completedblock[j]
			#Add as completed block
			completedblock.append({})
			completedblock[-1]['end'] = self.htmlpos
			completedblock[-1]['begin'] = state[i]['begin']
			completedblock[-1]['content'] = state[i]['content']
			if verbose > 2:
				print 'Add completed block %d and delete state %d (match[%d])' % (len(completedblock),i,state[i]['linematch'])
			#We have completed this match, delete it
			del state[i]		
	
	def scancompleted(self,state,completedblock):
		#Scan the completed blocks, if any current state start strictly before this completed block finished, delete the state
		#if all current states start stricly after when the block started: accept block
		for i in reversed(range(len(completedblock))):
			accepted = True
			for j in reversed(range(len(state))):
				if state[j]['begin'] > completedblock[i]['begin'] and state[j]['begin'] < completedblock[i]['end']:
					if verbose > 2:
						print "delete state %d (match[%d]) content=%s" % (j , state[j]['linematch'], state[j]['content'])
					del state[j]
				elif state[j]['begin'] <= completedblock[i]['begin']:
					accepted = False
			if accepted == True:
				self.processed += completedblock[i]['content']
				if verbose > 2:
					print 'Validate completedblock %d and add %s' % (i , completedblock[i]['content'])
				del completedblock[i]

################################################################################################
	# The following methods are called to handle the various HTML elements.
	# They are intended to be overridden in subclasses.

	def pdata(self, inchunk):
		"""Called when we encounter a new tag. All the unprocessed data since the last tag is passed to this method.
		Dummy method to override. Just returns the data unchanged."""
		state = self.statetable
		matchtable = self.matchtable
		completedblock = self.completedblock
		if not re.compile(r"\s*$").match(inchunk):			#Ignore empty lines
			if verbose > 1:
				print 'new content:', inchunk

			p = re.compile(r"_([a-z0-9]+)_")
			#Start new entry in statetable if it expects some content
			for i in range(len(matchtable)):
				if p.match(matchtable[i][0]):
					state.append({})
					state[-1]['linematch'] = i
					state[-1]['begin'] = self.htmlpos
					state[-1]['position'] = 0
					state[-1]['content'] = []
					if verbose > 2:
						print 'creating new state %d starting at position %d for matchline %d: ' % (len(state),self.htmlpos, i)

			if len(state) > 0:
				for i in reversed(range(len(state))):
					m = p.match(matchtable[state[i]['linematch']][state[i]['position']])
					if m:
						state[i]['content'].append(m.group(1))
						#Remove some special characters
						if self.replacecr:
							inchunk = string.replace(inchunk,'\r',self.replacecrval)
							inchunk = string.replace(inchunk,'\n',self.replacecrval)
						inchunk = string.replace(inchunk,'\r','\n')
						#remove html space character
						inchunk = string.replace(inchunk, '\xc2\xa0', ' ')
						state[i]['content'].append(inchunk)
						state[i]['position'] += 1
						if verbose > 2:
							print 'progressing state %d (match[%d]), now in position %d' % (i,state[i]['linematch'],state[i]['position'])
							print "state[%d]['content']=%s" % (i,state[i]['content'])
						self.checkifcompleted(state, i, completedblock)
					else:
						if verbose > 2:
							print 'delete state %d (match[%d]), it has failed to progress at position %d' % (i,state[i]['linematch'],state[i]['position'])
						del state[i]
			self.scancompleted(state,completedblock)
			self.htmlpos += 1
		else:
			#Handle the case were content is empty and we were expected a field.
			if self.emptyfield and (len(state) > 0):
				for i in reversed(range(len(state))):
					m = re.compile(r"_([a-z0-9]+)_").match(matchtable[state[i]['linematch']][state[i]['position']])
					if m:
						if verbose > 1:
							print 'empty new content when we were expecting a field to fill', inchunk
						state[i]['content'].append(m.group(1))
						#Remove some special characters
						inchunk = string.replace(inchunk,'\r','')
						inchunk = string.replace(inchunk,'\n','')
						state[i]['content'].append(inchunk)
						state[i]['position'] += 1
						if verbose > 2:
							print 'progressing state %d (match[%d]), now in position %d' % (i,state[i]['linematch'],state[i]['position'])
							print "state[%d]['content']=%s" % (i,state[i]['content'])
						self.checkifcompleted(state, i, completedblock)
						self.scancompleted(state,completedblock)
						self.htmlpos += 1
		return inchunk

	def pdecl(self, thetag):
		"""Called when we encounter the *start* of a declaration or comment. <!....
		It uses self.index.
		Dummy method to override. Just returns."""
		if verbose > 1:
			print 'comment <!' + thetag
		return '<'
	
	def ppi(self,thetag):
		"""Called when we encounter the *start* of a processing instruction. <?....
		It uses self.index.
		Dummy method to override. Just returns."""
		if verbose > 1:
			print 'processing <?' + thetag 
		return '<'

	def endtag(self, thetag):
		"""Called when we encounter a close tag. </....
		It is passed the tag contents (including leading '/') and just returns it."""
		if verbose > 1:
			print 'end tag: ' + thetag
		return '<' + thetag + '>'

	def emptytag(self, thetag):
		"""Called when we encounter a tag that we can't extract any valid name or attributes from.
		It is passed the tag contents and just returns it."""
		return '<' + thetag + '>'  

	def handletag(self, name, attrs, thetag):
		"""Called when we encounter a tag.
		Is passed the tag name and a list of (attrname, attrvalue) - and the original tag contents as a string."""
		state = self.statetable
		matchtable = self.matchtable
		completedblock = self.completedblock

		if self.completefont and name == "font":
			m = re.compile("""color=['"]?#?([0-9a-f]+|[a-zA-Z]+)['"]?(\s|$)""").search(thetag.lower())
			if m:
				name =  name + m.group(1)

		if self.completeclass:
			m = re.compile("""class=['"]?(\S+?)['"]?(\s|$)""").search(thetag.lower())
			if m:
				name =  name + m.group(1)

		if verbose > 1:
			print 'new tag', name
		#Start new entry in statetable if it matches the first entry in the respective match table
		for i in range(len(matchtable)):
			if re.compile(matchtable[i][0]).match(name):
				state.append({})
				state[-1]['linematch'] = i
				state[-1]['begin'] = self.htmlpos
				state[-1]['position'] = 0
				state[-1]['content'] = []
				if verbose > 2:
					print 'creating new state %d starting at position %d for matchline %d: ' % (len(state),self.htmlpos, i)

		#advance all pointers in the statetable, or remove the state if it stopped matching
		for i in reversed(range(len(state))):
			if re.compile(matchtable[state[i]['linematch']][state[i]['position']]).match(name):
				state[i]['position'] += 1
				if verbose > 2:
					print 'progressing state %d (match[%d]), now in position %d' % (i,state[i]['linematch'],state[i]['position'])
				self.checkifcompleted(state, i, completedblock)
			else:
				if verbose > 2:
					print 'delete state %d (match[%d]), it has failed to progress at position %d' % (i,state[i]['linematch'],state[i]['position'])
				del state[i]

		self.scancompleted(state,completedblock)
		self.htmlpos += 1

		return '<' + thetag + '>'

class Web:
	def __init__(self, url, timeout=30, data=None):
		self.handle = None
	
		if data:
			txdata = urllib.urlencode(data)
		else:
			txdata = None
		txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'}

		# timeout in seconds
		socket.setdefaulttimeout(timeout)

		try:
			req = urllib2.Request(url, txdata, txheaders)
			# create a request object

			self.handle = urllib2.urlopen(req)

		except IOError, e:
			errorstr = 'We failed to open "%s".' % url
			if hasattr(e, 'code'):
				errorstr += '\nWe failed with error code - %s.' % e.code
			elif hasattr(e, 'reason'):
				errorstr += "\nThe error object has the following 'reason' attribute :\n"
				errorstr += repr(e.reason)
				errorstr += "\nThis usually means the server doesn't exist, is down, or we don't have an internet connection."
			raise Error(errorstr,200)

	def read(self):
		return self.handle.read()

	def close(self):
		return self.handle.close()

	def readline(self):
		return self.handle.readline()

class XMLTV:
	def __init__(self):
		self.doc , self.tv_xml = self.startdoc()

	def startdoc(self):
		disclaimer= """
	Important Disclaimer:
	All data provided is the copyright of the original provider we're fetching the source from.
	You are required to check with each provider the acceptable terms of use and the various legal obligations you must adhere to.
	The data is usually restricted to personal use only.
	The author of this xmltv file makes no guarantee of any kind nor does he endorse the content.
	The author explicitely distances himself from the content provided by this xmltv file

	Information collected from:
	Al Jazeera: www.radiotimes.com
	Bloomberg: http://www.bloomberg.com
	Channel NewsAsia: http://www.channelnewsasia.com
	World of Fashion: http://www.worldfashion.tv
	Eurosport: http://yahoo.eurosport.com
	DW: http://www.dw-world.de
	BVN: http://www.bvn.nl
	Duna : http://www.dunatv.hu
	TRT International: http://www.trt.net.tr
	TVE: www.rtve.es
	CCTV channels: http://www.cctv.com
	Cuba Vision: http://www.cubavision.cubaweb.cu
	ERT World: http://tvradio.ert.gr
	TVRI: http://www.tvri.co.id
	RTM (Malaysia): http://www.rtm.net.my
	Russia Today: http://www.russiatoday.ru
	SCTV: http://www.communitytv.com.au
	TV5 : www.tv5.org
	Voice of America: http://www.ibb.gov
	VTV4: http://www.vtv.vn
	Thai TV5: http://www.tv5.co.th
	NASA TV, Classic, Playboy and Adult One: From TPG xmltv feed

	If you notice errors, missing channels or discrepancies, thank you for contacting the author: reg-jya-xmltv@avenard.org

"""
		# Create the xml document
		doc = Document()
		
		#Add disclaimer to xmltv
		disclaimer_xml = doc.createComment(disclaimer)
		doc.appendChild(disclaimer_xml)
		
		# Create the <tv> base element
		tv_xml = doc.createElement("tv")
		tv_xml.setAttribute("source-info-name","TPG's IPTV from various web sites")
		tv_xml.setAttribute("generator-info-name","JY Avenard TPG's IPTV xmltv generator")
		tv_xml.setAttribute("generator-info-url", "http://www.avenard.org/iptv/")
		doc.appendChild(tv_xml)
		return doc, tv_xml
	
	def filterxmltv(self, data, timezone, offset=day_offset, days=maxdays, removepattern=False):
		"""Create xmltv file containing only relevant programmes and channels"""
		docxml = xml.dom.minidom.parse(StringIO.StringIO(data))
		doc , tv_xml = self.startdoc()

		#Build min and max time based on time offset and max days
		#If offset is 0, retrieve all the content available from the source
		delta = datetime.timedelta(days=offset)
		#Pytz has inverted timezone
		tz = pytz.timezone('Etc/GMT%+d' % (	-timezone / 100))
		mindate = datetime.datetime.now(tz=tz) + delta
		maxdate = mindate + datetime.timedelta(days=days)
		mindate = mindate.year * 10000000000 + mindate.month * 100000000 + mindate.day * 1000000
		maxdate = maxdate.year * 10000000000 + maxdate.month * 100000000 + maxdate.day * 1000000

		p1 = re.compile(r'([0-9]+)\s*([\+|-]([0-9]+))?')
		if removepattern:
			pattern = re.compile(removepattern[0])

		#Scan the programme list and flag the used channels
		okchannellist = {}
		programmelist = docxml.getElementsByTagName('programme')
		for x in programmelist:
			ignore = False
			channel = ''
			for y in x.attributes.keys():
				name = x.attributes[y].name
				value = x.attributes[y].value
				if name == 'start':
					#If before --offset, ignore
					m = p1.match(value)
					if not m:
						errorstr = 'Invalid Programme Start Date. Abort (%s)' % value 
						raise Error(errorstr, 500)
					if int(m.group(1)) < mindate:
						ignore = True
						break
					#If before --offset, ignore
					if int(m.group(1)) >= maxdate:
						ignore = True
						break
				if name == 'channel':
					channel = value
			if not ignore:
				if verbose > 1:
					print 'adding' , channel, 'in valid channel list'
				okchannellist[channel] = True
		if verbose > 1:
			print okchannellist	

		#Create the channel list
		channellist = docxml.getElementsByTagName('channel')

		for x in channellist:
			ignore = False
			for y in x.attributes.keys():
				if x.attributes[y].name == 'id':
					value = x.attributes[y].value
					if okchannellist.has_key(value):
						if verbose > 1:
							print 'Adding channel' , value
					else:
						if verbose > 1:
							print 'Ignoring channel' , value
						ignore = True
						break
						
			if not ignore:
				#Copy the original attributes 
				elem = doc.createElement("channel")
				for y in x.attributes.keys():
					name = x.attributes[y].name
					value = x.attributes[y].value
					if name == 'id' and removepattern:
						value = pattern.sub(removepattern[1],value)
					elem.setAttribute(name,value)
				for y in x.getElementsByTagName('display-name'):
					elem.appendChild(y.cloneNode(True))
				tv_xml.appendChild(elem)
				
		programmelist = docxml.getElementsByTagName('programme')
		for x in programmelist:
			ignore = False
			elem = doc.createElement("programme")
			#Copy the original attributes 
			for y in x.attributes.keys():
				name = x.attributes[y].name
				value = x.attributes[y].value
				if name == 'start':
					#If before --offset, ignore
					m = p1.match(value)
					if int(m.group(1)) < mindate:
						ignore = True
						break
					#If before --offset, ignore
					if int(m.group(1)) >= maxdate:
						ignore = True
						break
				if name == 'channel' and removepattern:
					value = pattern.sub(removepattern[1],value)
				elem.setAttribute(name,value)

			if not ignore:
				list_programmeelement = [ 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'language', 'orig-language', 'length',
											'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown',
											'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating' ]
				#copy all the previous properties in the right order
				for y in list_programmeelement:
					for z in x.getElementsByTagName(y):
						elem.appendChild(z.cloneNode(True))
				tv_xml.appendChild(elem)
		return doc
	
	def mergechannels(self, docxml):
		#Create the channel list
		channellist = docxml.getElementsByTagName('channel')

		for x in channellist:
			#Copy the original attributes 
			elem = docxml.createElement("channel")
			for y in x.attributes.keys():
				elem.setAttribute(x.attributes[y].name,x.attributes[y].value)
			for y in x.getElementsByTagName('display-name'):
				elem.appendChild(y.cloneNode(True))
			self.tv_xml.appendChild(elem)
	
	def mergeprogrammes(self, docxml):
		programmelist = docxml.getElementsByTagName('programme')
		for x in programmelist:
			elem = self.doc.createElement("programme")
			#Copy the original attributes 
			for y in x.attributes.keys():
				name = x.attributes[y].name
				value = x.attributes[y].value
				elem.setAttribute(name,value)

			list_programmeelement = [ 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'language', 'orig-language', 'length',
									'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown',
									'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating' ]
			#copy all the previous properties in the right order
			for y in list_programmeelement:
				for z in x.getElementsByTagName(y):
					elem.appendChild(z.cloneNode(True))
			self.tv_xml.appendChild(elem)
	
	def adjusttimezone(self, docxml, timezone):
		doc , tv_xml = self.startdoc()

		m = re.compile(r'([+-]?([0-9]{1,2})([0-9]{2}))').match(`timezone`)
		tz = int(m.group(1))
		tzh = int(m.group(2))
		tzm = int(m.group(3))
		if tz < 0:
			tzm = -tzm
			tzh = -tzh
		tzdelta = datetime.timedelta(hours=tzh,minutes=tzm)

		p1 = re.compile(r'([0-9]{4})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})([0-9]{2})\s*(([\+|-])([0-9]{2})([0-9]{2}))')
		#Create the channel list
		channellist = docxml.getElementsByTagName('channel')

		for x in channellist:
			#Copy the original attributes 
			elem = doc.createElement("channel")
			for y in x.attributes.keys():
				name = x.attributes[y].name
				value = x.attributes[y].value
				elem.setAttribute(name,value)
			for y in x.getElementsByTagName('display-name'):
				elem.appendChild(y.cloneNode(True))
			tv_xml.appendChild(elem)
				
		programmelist = docxml.getElementsByTagName('programme')
		for x in programmelist:
			elem = doc.createElement("programme")
			#Copy the original attributes 
			for y in x.attributes.keys():
				name = x.attributes[y].name
				value = x.attributes[y].value
				if name == 'start' or name == 'stop':
					m1 = p1.match(value)
					date = datetime.datetime(int(m1.group(1)), int(m1.group(2)), int(m1.group(3)), int(m1.group(4)), int(m1.group(5)), int(m1.group(6)))
					#Put back date in UTC
					if m1.group(7):
						if m1.group(8) == '+':
							date -= datetime.timedelta(hours=int(m1.group(9)),minutes=int(m1.group(10)))
						else:
							date += datetime.timedelta(hours=int(m1.group(9)),minutes=int(m1.group(10)))
					date += tzdelta
					startdate = date.year * 10000000000 + date.month * 100000000 + date.day * 1000000 + date.hour * 10000 + date.minute * 100
					value = '%014d' % startdate + ' %+05d' % tz
				elem.setAttribute(name,value)

			list_programmeelement = [ 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'language', 'orig-language', 'length',
									'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown',
									'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating' ]
			#copy all the previous properties in the right order
			for y in list_programmeelement:
				for z in x.getElementsByTagName(y):
					elem.appendChild(z.cloneNode(True))
			tv_xml.appendChild(elem)
		return doc
	
	def scangenre(self, tab):
		genre = { 'news' : 'News' , 'journal' : 'News' }
		for i in reversed(range(len(tab)/2)):
			x = tab[i*2]
			y = tab[i*2+1]
			if x == 'title':
				for j in genre:
					if y.lower().find(j) >= 0:
						tab[i*2+2:i*2+2] = [ genre[j] ]
						tab[i*2+2:i*2+2] = [ 'category' ]
		return tab

	def textdecode(self,s, html=True, encoding='iso8859-1',replacecr=True):
		pcr = re.compile(r'^\s*|\s*$', re.UNICODE)
		pcr1 = re.compile(r'\n|\r', re.UNICODE)
		pspace = re.compile(r'\s+', re.UNICODE)
		
		p = re.compile('&(%s);' % '|'.join(htmlentitydefs.name2codepoint), re.UNICODE)
		s = unicode(unicode(s, encoding).encode('utf-8'),'utf-8')

		#Beautiful one-liner I found to replace html character code into ISO-8859-1
		if html:
			s = p.sub(lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]), s)

		if replacecr:
			s = pcr1.sub(' ', s)

		s = pspace.sub(' ', s)

		#Remove leading and trailing space and CR
		return pcr.sub('', s)

	def addchannel(self, channel_name, channel_id, lang=None):
		# Create <channel> element
		channel_xml = self.doc.createElement("channel")
		channel_xml.setAttribute("id",channel_id)
		# Create <display-name> element
		displayname_xml = self.doc.createElement("display-name")
		if lang:
			displayname_xml.setAttribute("lang",lang)
		channel_xml.appendChild(displayname_xml)
		ptext = self.doc.createTextNode(channel_name)
		displayname_xml.appendChild(ptext)
		self.tv_xml.appendChild(channel_xml)

	def addqueueprogramme(self, processed, channelid, offset=0, days=8, dst=0, timezone=0, startdate=None, marker='time', encoding='iso8859-1'):
		doc = self.doc
		tv_xml = self.tv_xml
		channelname = channelid
		monthvalue = {	'janvier' : 1 , 'fevrier' : 2, 'mars' : 3 , 'avril' : 4, 'mai' : 5, 'juin' : 6, 'juillet' : 7, 'aout' : 8, 'septembre' : 9, 'octobre' : 10, 'novembre' : 11, 'decembre' : 12,
		 				'january' : 1 , 'february' : 2, 'march' : 3 , 'april' : 4, 'may' : 5, 'june' : 6, 'july' : 7, 'august' : 8, 'september' : 9, 'october' : 10, 'november' : 11, 'december' : 12,
		 				'jan' : 1 , 'feb' : 2, 'mar' : 3 , 'apr' : 4, 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12
					}
		month_day = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
		#Calculate date wintertime and date summertime
		scannedprogramme = 0
		timezoneori = timezone
		lastsundayw = monthwinter * 100 + 31 - datetime.datetime(datetime.datetime.today( ).year, monthwinter, 31).isoweekday( ) % 7
		lastsundays = monthsummer * 100 + 31 - datetime.datetime(datetime.datetime.today( ).year, monthsummer, 31).isoweekday( ) % 7
		#Hack to handle TV5 incorrect time change for 2008.
		changetimetoday = 0

		#Build min and max time based on time offset and max days
		#If offset is 0, retrieve all the content available from the source
		delta = datetime.timedelta(days=offset)
		#Pytz has inverted timezone
		tz = pytz.timezone('Etc/GMT%+d' % (	-timezone / 100))
		mindate = datetime.datetime.now(tz=tz) + delta
		maxdate = mindate + datetime.timedelta(days=days)
		mindate = mindate.year * 10000000000 + mindate.month * 100000000 + mindate.day * 1000000
		maxdate = maxdate.year * 10000000000 + maxdate.month * 100000000 + maxdate.day * 1000000

		marker = { 'date':1 , 'time':2, 'title':3, 'category':4, 'subtitle':5, 'description':6 }[marker]
		state = ignore = 0
		if not startdate:
			currentdate = datetime.datetime.now(tz=tz)
			currentdate = datetime.datetime(currentdate.year, currentdate.month, currentdate.day)
		#Create a stack of two programmes
		plist = []
		plist.append({})
		plist[0]['time'] = currentdate
		plist[0]['tz'] = timezoneori
		plist[0]['title'] = ''
		plist[0]['subtitle'] = ''
		plist[0]['category'] = ''
		plist[0]['description'] = ''
		plist.append(copy.deepcopy(plist[0]))
		title = category = ''
		subtitle = []
		description = []
		reempty = re.compile(r'\s*$')
		#different possible states:
		#	1 = date
		#	2 = time
		#	3 = title
		#	4 = category
		#	5 = subtitle
		#	6 = description

		for loop in range(len(processed) / 2):
			x = processed[loop*2]
			if x == 'date': 
				state = 1
				#Handle the date
			elif x == 'time':
				state = 2
			elif x == 'title':
				state = 3
			elif x == 'category':
				state = 4
			elif x == 'subtitle':
				state = 5
			elif x == 'desc':
				state = 6
			#Not a valid field, just skip it
			else:
				continue

			x = processed[loop*2+1]

			if verbose > 1:
				print 'x' , x , 'state', state, 'ignore', ignore

			#If we got a new marker, time to process the programme
			if state == marker:
				if verbose > 1:
					print 'processing new programme'
				if not ignore:
					#New entry, reset the ignore flag

					#Calculate the DST value
					timezone = timezoneori

					plist[0] = copy.deepcopy(plist[1])
					plist[1]['time'] = currentdate
					plist[1]['tz'] = timezone

					plist[1]['title'] = title
					plist[1]['subtitle'] = ''
					for i in subtitle:
						plist[1]['subtitle'] += i + ' '
					plist[1]['category'] = category
					plist[1]['description'] = ''
					for i in description:
						plist[1]['description'] += i + ' '
					title = category = ''
					description = []
					subtitle = []

					if verbose > 1:
						print 'scanned programme', scannedprogramme
					if verbose > 2:
						print 'plist=', plist

					if scannedprogramme > 0 and verbose > 0:
						print 'date = ' + `plist[1]['time'].day` + ' title = ' + plist[1]['title'] + ' time: %02d:%02d %+05d' % (plist[1]['time'].hour,plist[1]['time'].minute, plist[1]['tz'])

					if scannedprogramme >= 2:
						#We're starting a new program, so print the previous one if it exist

						startdate = plist[0]['time'].year * 10000000000 + plist[0]['time'].month * 100000000 + plist[0]['time'].day * 1000000 + plist[0]['time'].hour * 10000 + plist[0]['time'].minute * 100
						stopdate  = plist[1]['time'].year * 10000000000 + plist[1]['time'].month * 100000000 + plist[1]['time'].day * 1000000 + plist[1]['time'].hour * 10000 + plist[1]['time'].minute * 100
						if startdate >= maxdate:
							break
						if startdate >= mindate:
							startstr = '%014d' % startdate + ' %+05d' % plist[0]['tz']
							stopstr = '%014d' % stopdate + ' %+05d' % plist[1]['tz']
							if verbose > 0:
								print 'start=%s' % startstr + ' stop=%s' % stopstr
							programme = doc.createElement("programme")
							programme.setAttribute("channel", channelname)
							programme.setAttribute("start", startstr)
							programme.setAttribute("stop", stopstr)
							tv_xml.appendChild(programme)

							if plist[0]['title'] != '':
								# Create a <title> element
								title_xml = doc.createElement("title")
								programme.appendChild(title_xml)
								# Give the <title> element some text
								ptext = doc.createTextNode(self.textdecode(plist[0]['title'],encoding=encoding,replacecr=True))
								title_xml.appendChild(ptext)
							else:
								errorstr = 'Error: title must not be empty: %s - %s' % (channelname,startstr)
								errorstr += "\nIt's likely you are getting this error as the source page have changed its layout. Please contact the author to let him know"
								raise Error(errorstr,300)

							if plist[0]['subtitle'] != '':
								text = self.textdecode(plist[0]['subtitle'],encoding=encoding,replacecr=True)
								if text != '':
									ptext = doc.createTextNode(text)
									# Create a <sub-title> element
									subtitle_xml = doc.createElement("sub-title")
									programme.appendChild(subtitle_xml)
									subtitle_xml.appendChild(ptext)

							if plist[0]['description'] != '':
								text = self.textdecode(plist[0]['description'],encoding=encoding,replacecr=True)
								if text != '':
									ptext = doc.createTextNode(text)
									# Create a <desc> element
									desc_xml = doc.createElement("desc")
									programme.appendChild(desc_xml)
									# Give the <desc> element some text
									desc_xml.appendChild(ptext)

							if plist[0]['category'] != '':
								# Create a <category> element
								category_xml = doc.createElement("category")
								programme.appendChild(category_xml)
								# Give the <desc> elemenet some text
								plist[0]['category'] = string.replace(plist[0]['category'],' [','')
								plist[0]['category'] = string.replace(plist[0]['category'],']','')
								ptext = doc.createTextNode(self.textdecode(plist[0]['category'],encoding=encoding,replacecr=True))
								category_xml.appendChild(ptext)
					scannedprogramme += 1
				else:
					ignore = 0
				
			if state == 1:
				m = re.compile(r'\s*\S*?\s*([0-9]+) (\S+?)[,\s]*([0-9]+)|([0-9]+)/\s*([0-9]+)/([0-9]+)|^([a-zA-Z])+$').match(x)
				if m:
					if m.group(5):
						currentdate_tmp = datetime.datetime(int(m.group(6)),int(m.group(5)),int(m.group(4)))
					elif m.group(2) and monthvalue.has_key(m.group(2).lower()):
						currentdate_tmp = datetime.datetime(int(m.group(3)),monthvalue[m.group(2).lower()],int(m.group(1)))
					else:
						raise Error('%s: date is not valid (received %s)' % (channelid, x),301)
					#If we have already calculated the new date, no need to replace as we would loose the time otherwise
					if verbose > 1:
						print 'date field', currentdate, currentdate_tmp
					if not (currentdate_tmp.year == currentdate.year and currentdate_tmp.month == currentdate.month and currentdate_tmp.day == currentdate.day):
						currentdate = currentdate_tmp

					#Calculate DST
					timezone = timezoneori
				else:
					raise Error('%s: date is not valid (received %s)' % (channelid, x),302)
			elif state == 2:
				m = re.compile(r'(&nbsp;|\s)*([0-9]*/[0-9]*/[0-9]*(&nbsp;|\s)*)*([0-2]?[0-9])[:\.]?([0-9]{2})\s*(AM|PM|am|pm)?', re.UNICODE).match(x)
				if m:
					hour = int(m.group(4))
					minute = int(m.group(5))
					if hour == 24:
						hour = 0
					if m.group(6):
						#time is 12AM -> 00:00
						if m.group(6).lower() == 'am' and hour == 12:
							hour = 0
						#time is xxPM -> add 12 hours
						elif m.group(6).lower() == 'pm' and hour < 12:
							hour += 12
					currentdate = datetime.datetime(currentdate.year, currentdate.month, currentdate.day, hour, minute)
					if hour < plist[1]['time'].hour and currentdate.day == plist[1]['time'].day and currentdate.month == plist[1]['time'].month:
						delta = datetime.timedelta(days=1)
						currentdate += delta
				else:
					raise Error('Invalid time for %s (got: %s) in %s' % (channelid,repr(x),repr(currentdate)) ,303)

			elif state == 3:							#New title, process the previous entry and add it to the xml tree
				#Check if TV5 just changed the time
				if re.compile(r"changement d\'heure").match(x.lower()):
					if verbose > 1:
						print 'Change DST'
					ignore = 1
					if dst:
						#Set changetimetoday if DST is scheduled to happen today
						changetimetoday = 1
				else:
					title = x
			elif state == 4 and not ignore:
				category = x
			elif state == 5 and not ignore:
				subtitle.append(x)
			elif state == 6 and not ignore:
				description.append(x)
			
			state = 0
		
		#Finish last entry
		if verbose > 1:
			print 'processing new programme'
		if not ignore:
			#New entry, reset the ignore flag

			#Calculate the DST value
			timezone = timezoneori

			plist[0] = copy.deepcopy(plist[1])
			plist[1]['time'] = currentdate
			plist[1]['tz'] = timezone
			plist[1]['title'] = title
			plist[1]['subtitle'] = ''
			for i in subtitle:
				plist[1]['subtitle'] += i + ' '
			plist[1]['category'] = category
			plist[1]['description'] = ''
			for i in description:
				plist[1]['description'] += i + ' '
			title = category = ''
			description = []
			subtitle = []

			if verbose > 1:
				print 'scanned programme', scannedprogramme
			if verbose > 2:
				print 'plist=', plist

			if scannedprogramme > 0 and verbose > 0:
				print 'date = ' + `plist[1]['time'].day` + ' title = ' + plist[1]['title'] + ' time: %02d:%02d %+05d' % (plist[1]['time'].hour,plist[1]['time'].minute, plist[1]['tz'])

			if scannedprogramme >= 2:
				#We're starting a new program, so print the previous one if it exist

				startdate = plist[0]['time'].year * 10000000000 + plist[0]['time'].month * 100000000 + plist[0]['time'].day * 1000000 + plist[0]['time'].hour * 10000 + plist[0]['time'].minute * 100
				stopdate  = plist[1]['time'].year * 10000000000 + plist[1]['time'].month * 100000000 + plist[1]['time'].day * 1000000 + plist[1]['time'].hour * 10000 + plist[1]['time'].minute * 100
				if startdate >= maxdate:
					return
				if startdate >= mindate:
					startstr = '%014d' % startdate + ' %+05d' % plist[0]['tz']
					stopstr = '%014d' % stopdate + ' %+05d' % plist[1]['tz']
					if verbose > 0:
						print 'start=%s' % startstr + ' stop=%s' % stopstr
					programme = doc.createElement("programme")
					programme.setAttribute("channel", channelname)
					programme.setAttribute("start", startstr)
					programme.setAttribute("stop", stopstr)
					tv_xml.appendChild(programme)

					if plist[0]['title'] != '':
						# Create a <title> element
						title_xml = doc.createElement("title")
						programme.appendChild(title_xml)
						# Give the <title> element some text
						ptext = doc.createTextNode(self.textdecode(plist[0]['title'],encoding=encoding,replacecr=True))
						title_xml.appendChild(ptext)
					else:
						errorstr = 'Error: title must not be empty: %s - %s' % (channelname,startstr)
						errorstr += "\nIt's likely you are getting this error as the source page have changed its layout. Please contact the author to let him know"
						raise Error(errorstr,300)

					if plist[0]['subtitle'] != '':
						text = self.textdecode(plist[0]['subtitle'],encoding=encoding,replacecr=True)
						if text != '':
							ptext = doc.createTextNode(text)
							# Create a <sub-title> element
							subtitle_xml = doc.createElement("sub-title")
							programme.appendChild(subtitle_xml)
							subtitle_xml.appendChild(ptext)

					if plist[0]['description'] != '':
						text = self.textdecode(plist[0]['description'],encoding=encoding,replacecr=True)
						if text != '':
							ptext = doc.createTextNode(text)
							# Create a <desc> element
							desc_xml = doc.createElement("desc")
							programme.appendChild(desc_xml)
							# Give the <desc> element some text
							desc_xml.appendChild(ptext)

					if plist[0]['category'] != '':
						# Create a <category> element
						category_xml = doc.createElement("category")
						programme.appendChild(category_xml)
						# Give the <desc> elemenet some text
						plist[0]['category'] = string.replace(plist[0]['category'],' [','')
						plist[0]['category'] = string.replace(plist[0]['category'],']','')
						ptext = doc.createTextNode(self.textdecode(plist[0]['category'],encoding=encoding,replacecr=True))
						category_xml.appendChild(ptext)
			scannedprogramme += 1


	def toxml(self,docxml=False):
		if docxml:
			return docxml.toxml(encoding="utf-8")
		else:
			return self.doc.toxml(encoding="utf-8")

	def toprettyxml(self,docxml=False):
		if docxml:
			return docxml.toprettyxml(indent="  ",encoding="utf-8")
		else:
			return self.doc.toprettyxml(indent="  ",encoding="utf-8")

def calc_timezone(date):
	d = date.utcoffset().seconds / 36
	if d > 1200:
		d -= 2400
	return d

class TVGRAB:
	def time_title(self,result):
		#format is XX:XX Title, separate time and title
		p = re.compile(r'\s*\[?([0-9]{1,2}(:|\.)[0-9]{2})\]?\s*(.*?)\s*$', re.UNICODE)
		result2 = []
		ignore = False
		for i in range(len(result)/2):
			x = result[i*2]
			y = result[i*2+1]
			if verbose > 1:
				print 'x=',x,'y=',repr(y)
			if x == 'time':
				m = p.match(y)
				if m:
					result2.append('time')
					result2.append(m.group(1))
					result2.append('title')
					result2.append(m.group(3))
					ignore = False
				else:
					ignore = True
			elif not ignore:
				result2.append(x)
				result2.append(y)
			elif verbose > 1:
				print 'ignoring'
		return result2

	def main(self, conf, output=sys.stdout, outfilename=False, reuse=False, pretty=False, converttz=False, timezone=0000,maxdays=7,offset=0):

		finishtemplate = { 'id': False, 'name': False, 'timezone': False, 'dst': 0, 'result': False, 'lang': False , 'marker': 'time', 'encoding': 'iso8859-1'}
		finishtab= []
		output2 = output
		xmltv = XMLTV()

		if reuse:
			if outfilename:
				output2 = open(outfilename,'w')
			if converttz:
				doc = xmltv.adjusttimezone(reuse.doc,timezone)
				if pretty:
					output2.write(xmltv.toprettyxml(docxml=doc))
				else:
					output2.write(xmltv.toxml(docxml=doc))
			else:
				doc = reuse.doc
				if pretty:
					output2.write(reuse.toprettyxml())
				else:
					output2.write(reuse.toxml())
			
			if outfilename:
				output2.close()
			return doc
			
	#Process Al Jazeera data
		if grab_channel['aljazeera']:
			if verbose > 0:
				print 'Processing Al JAzeera'
			try:
				handle = Web(ajurl)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				# Description of Radio Times data fields (23 in total) (extracted from tv_grab_uk_rt:
				#
				# title - the programme title (text)
				# sub_title - infrequently defined - preference is given to episode
				#             if defined (text)
				# episode - the name of a particular episode of the programme and/or 
				#           the episode's position in the current series (text)
				# year - the year of production (text)
				# director - the programme's director(s) (text)
				# cast - the programme's cast (may include character details) (text)
				# premiere - whether this is a film's first showing (boolean)
				# film - whether the programme is a film (boolean)
				# repeat - whether the programme has been shown before (boolean)
				# subtitles - whether subtitles are available (boolean)
				# widescreen - whether the broadcast is 16:9 widescreen (boolean)
				# new_series - whether the programme is the first episode in a 
				#              series new (boolean)
				# deaf_signed - whether in-vision signing is available (boolean)
				# blank_and_white - whether the broadcast is not in colour (boolean)
				# star_rating - a star rating between 0 and 5 for films (text)
				# certificate - the BBFC certificate for the programme (text)
				# genre - the genre of the programme (text)
				# desc - a description of the programme. Can be a specific review by a
				#        Radio Times reviewer (text)
				# choice - whether the programme is recommended by the 
				#          Radio Times (boolean)
				# date - the transmission date (text)
				# start - the transmission start time for the programme (text)
				# stop - the transmissions stop time for the programme (text)
				# duration_mins - the duration of the programme in minutes (text)
				resultaj = []
			#	programmeformat = [ 'title', 'subtitle','episode', 'year', 'director', 'cast', 'premiere', 'film', 'repeat', 'subtitles', 'widescreen',
			#	 					'new_series', 'deaf_signed', 'colour', 'rating', 'certificate', 'genre', 'desc', 'choice', 'date', 'start', 'stop', 'duration' ]
				programmeformat = [ 'title', '','subtitle', '', '', '', '', '', '', '', '',
									'', '', '', '', '', 'category', 'desc', '', 'date', 'time', '', '' ]
				tz = pytz.timezone('Europe/London')
				date = datetime.datetime.now(tz=tz)
				#Rebuild list of programme item for XMLTV class
				while True:
					x = handle.readline()
					if not x:
						break
					x = string.replace(x,'\r','')
					x = string.replace(x,'\n','')
					if verbose > 1:
						print 'Reading :', x
					tab = re.compile(r"~").split(x)
					if len(tab) == 23:
						for i in range(len(programmeformat)):
							if tab[i] and programmeformat[i]:
								#If no category, ignore
								if not (programmeformat[i] == 'category' and tab[i] == 'No Genre'):
									resultaj.append(programmeformat[i])
									resultaj.append(tab[i])
					else:
						if verbose > 0:
							print 'incorrect programme field, only %d fields' % len(tab)
				if verbose > 0:
					print 'Done grabbing data.. processing'
				resultaj = xmltv.scangenre(resultaj)

				if not resultaj:
					print >> sys.stderr, "Couldn't extract any content from Al Jazeera"
					#raise Error("Couldn't extract any content from Al Jazeera",401)
				else:
					#Add it to the processed table
					finishtab.append(copy.deepcopy(finishtemplate))
					finishtab[-1]['id'] = ajidname
					finishtab[-1]['result'] = resultaj
					finishtab[-1]['lang'] = 'en'
					finishtab[-1]['name'] = ajchannelname
					finishtab[-1]['timezone'] = calc_timezone(date)
					finishtab[-1]['dst'] = 0
					finishtab[-1]['marker'] = 'title'
		else:
			if verbose > 0:
				print 'Ignoring Al Jazeera'

		if grab_channel['bloomberg']:
			if verbose > 0:
				print 'Processing Bloomberg'
			#Calculate timezone
			tz = pytz.timezone('Asia/Hong_Kong')
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			weekday = date.isoweekday() % 7
			#Read 7 days maximum worth of programme
			days = min(7-offset, maxdays+1)
			parser = Scraper(statebloomberg)
			day_tab = [ 'sunday', 'monday' , 'tuesday' , 'wednesday', 'thursday', 'friday', 'saturday' ]
			for i in range(days):
				url = '%s_%s.html' % ( bloombergurl, day_tab[weekday])
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					weekday = (weekday + 1) % 7

			result = parser.processed

			parser.close()
			if not result:
				print >> sys.stderr, "Couldn't extract any content from Bloomberg"
				#raise Error("Couldn't extract any content from Bloomberg",412)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]
				result = xmltv.scangenre(result)
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = bloombergidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'en'
				finishtab[-1]['name'] = bloombergchannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Bloomberg'

		if grab_channel['newsasia']:
			if verbose > 0:
				print 'Processing NewsAsia'
			#Calculate timezone in Singapore
			tz = pytz.timezone('Singapore')
			date = datetime.datetime.now(tz=tz)
			#Read 8 days maximum worth of programme
			days = min(8, maxdays+1)
			parser = Scraper(statenewsasia)
			for i in range(days):
				url = '%s?day=%d' % ( newsasiaurl , i + offset - 1)
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')

			newsasiaresult = parser.processed
			parser.close()
			if not newsasiaresult:
				print >> sys.stderr, "Couldn't extract any content from NewsAsia"
				#raise Error("Couldn't extract any content from NewsAsia",406)
			else:
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = newsasiaidname
				finishtab[-1]['result'] = newsasiaresult
				finishtab[-1]['lang'] = 'en'
				finishtab[-1]['name'] = newsasiachannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring NewsAsia'

		if grab_channel['worldfashion']:
			if verbose > 0:
				print 'Processing World of Fashion'
			tz = pytz.timezone('Europe/Moscow')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset-1)
			#Read 3 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(4-offset, maxdays+1)
			parser = Scraper(statewof)
			for i in range(days):
				url = '%s?date=%02d.%02d.%04d' % (wofurl, date.day, date.month, date.year)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					date += datetime.timedelta(days=1)

			result = parser.processed
			parser.close()
			
			if not result:
				print >> sys.stderr, "Couldn't extract any content from World of Fashion"
				#raise Error("Couldn't extract any content from World of Fashion",415)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = wofidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'en'
				finishtab[-1]['name'] = wofchannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring World of Fashion'

		if grab_channel['eurosport']:
			if verbose > 0:
				print 'Processing Eurosport'
			#Calculate timezone
			tz = pytz.timezone('Etc/GMT')
			offset2 = offset
			if offset > 0: offset2 -= 1
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset2)
			#Read 8 days maximum worth of programme
			days = min(8-offset, maxdays+1)
			parser = Scraper(stateeurosport)
			for i in range(days):
				url = '%s_day%d.shtml' % ( eurosporturl, i+offset)
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					#Search for "Eurosport World", as this is where the programme starts
					start = data.find('alt="Eurosport World"')
					if start > 0:
						data = '<img ' + data[start:]
						parser.feed(data,ignoretag='br')

			eurosportresult = parser.processed
			parser.close()
			if not eurosportresult:
				print >> sys.stderr, "Couldn't extract any content from Eurosport"
				#raise Error("Couldn't extract any content from NewsAsia",411)
			else:
				eurosportresult[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]
				eurosportresult = xmltv.scangenre(eurosportresult)
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = eurosportidname
				finishtab[-1]['result'] = eurosportresult
				finishtab[-1]['lang'] = 'en'
				finishtab[-1]['name'] = eurosportchannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Eurosport'

		if grab_channel['eurosportnews']:
			if verbose > 0:
				print 'Processing Eurosport News'
			#Calculate timezone
			tz = pytz.timezone('Australia/Melbourne')
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Generate 8 days maximum worth of programme
			days = min(8, maxdays+1)
			result = []
			for i in range(days):
				for j in range(24):
					result.append('time')
					result.append('%02d:00' % (j%24))
					result.append('title')
					result.append('Eurosport News')
					result.append('desc')
					result.append('Interactive sports programme, with rolling sports news and updates every 15 minutes.')

			result[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]
			finishtab.append(copy.deepcopy(finishtemplate))
			result = xmltv.scangenre(result)
			finishtab[-1]['id'] = eurosportnewsidname
			finishtab[-1]['result'] = result
			finishtab[-1]['lang'] = 'en'
			finishtab[-1]['name'] = eurosportnewschannelname
			finishtab[-1]['timezone'] = calc_timezone(date)
			finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Eurosport News'

		if grab_channel['dw']:
			if verbose > 0:
				print 'Processing DW'
			#Generate URL
			tz = pytz.timezone('UTC')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Read web site
			data = ''
			parser = Scraper(statedw,completeclass=True)
			#DW only has 7 days worth of data from today
			days = min(7-offset, maxdays+1)
			for i in range(days):
				url = '%s?wday=%d&sprache=gb&schiene=dwtvasien&to=0' % (dwurl, date.isoweekday())
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					date += datetime.timedelta(days=1)
			result = parser.processed
			parser.close()
			if not result:
				print >> sys.stderr, "Couldn't extract any content from DW"
				#raise Error("Couldn't extract any content from DW",404)
			else:
				result = xmltv.scangenre(result)
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]	
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = dwidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'dw'
				finishtab[-1]['name'] = dwchannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring DW'

		if grab_channel['trtint']:
			if verbose > 0:
				print 'Processing TRT International'
			#Generate URL
			tz = pytz.timezone('Asia/Istanbul')
			offset2 = offset
			#Will read one day early to get data from midnight (TRT starts at 6AM)
			if offset > 0:
				offset2 -= 1
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset2)
			#Read web site
			data = ''
			parser = Scraper(statetrt,completeclass=True)
			#TRT only has 7 days worth of data from today
			days = min(7-offset2, maxdays+1)
			counter = 0
			for i in range(days):
				url = '%s?gunler=%d&kanal=6&akistur=1&tdgun=%d&control=0' % (trturl, date.isoweekday() % 7, counter+offset2)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					date += datetime.timedelta(days=1)
					counter += 1
			result = parser.processed
			parser.close()
			if not result:
				print >> sys.stderr, "Couldn't extract any content from TRT International"
				#raise Error("Couldn't extract any content from DW",404)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]	
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = trtidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'tr'
				finishtab[-1]['name'] = trtchannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring TRT International'

	#Process lyngsat channels
	#lyngsat format: 0: id, 1: url, 2: name, 3: language, 5: timezone , 5: dst
		for x in lyngsat:
		
			if grab_channel[x[0]]:
				if verbose > 0:
					print 'Processing ' + x[0]
				#Read web site
				tz = pytz.timezone('Europe/Paris')
				date = datetime.datetime.now(tz=tz)
				url = x[1] + '?offset=0'
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					url = x[1] + '?offset=1'
					if verbose > 0: 
						print 'Reading url=%s' % url
					handle = Web(url)
					data = data + handle.read()
					handle.close()
					if verbose > 0:
						print 'Done grabbing data.. processing'
					#Extract HTML
					parser = Scraper(statelyngsat)
					parser.feed(data,ignoretag='br')
					resultlyngsat = parser.processed
					parser.close()
					#Add automatic category based on title
					resultlyngsat = xmltv.scangenre(resultlyngsat)
					if not resultlyngsat:
						print >> sys.stderr, "Couldn't extract any content from %s" % x[0]
						#raise Error("Couldn't extract any content from %s" % x[0], 403)
					else:
						resultlyngsat[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]	

						#Add it to the processed table
						finishtab.append(copy.deepcopy(finishtemplate))
						finishtab[-1]['id'] = x[0]
						finishtab[-1]['result'] = resultlyngsat
						finishtab[-1]['lang'] = x[3]
						finishtab[-1]['name'] = x[2]
						finishtab[-1]['timezone'] = calc_timezone(date)
						finishtab[-1]['dst'] = x[5]
			else:
				if verbose > 0:
					print 'Ignoring ' + x[0]

	#Process TVE
		if grab_channel['tve']:
			if verbose > 0:
				print 'Processing TVE'
			tz = pytz.timezone('Europe/Madrid')
			#TVE page starts at 6AM for each day, so start fetching the previous day too
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset - 1)
			#Grab up to 8 days worth of data
			j = min(8-offset, maxdays+1)
			data = ''
			parser = Scraper(statetve)
			for i in range(j):
				x = '%02d%02d' % (date.day, date.month)
				url = tveurl + x + '.htm'
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					parser.feed(data,emptyfield=False,ignoretag='br')
					handle.close()
				date += datetime.timedelta(days=1)
			resulttve = parser.processed
			parser.close()

			#TVE has shocking html, so purely extract all possible text and run regular expression to extract the time and title
			textextract = ''
			for i in range (len(resulttve)/2):
				x = resulttve[i*2]
				y = resulttve[i*2+1]
				if x == 'title':
					textextract += '\n' + y
				#Remove date field
				elif x == 'date':
					continue
				else:
					textextract += ' ' + y
				
			p = re.compile(r'\s*([0-9]{1,2}:[0-9]{2})(:[0-9]{2})?\s*(-|&ndash;)*\s*(.*)\s*?([\s\S]*?)(\s|&nbsp;)*([0-9]{1,2}:[0-9]{2}(:[0-9]{2})?\s*(-|&ndash;)*\s*[\s\S]*)')
			resulttve2 = []
			finished = False

			while not finished:
				m = p.search(textextract)
				if m:
					if verbose > 2:
						print 'group', m.groups()
					resulttve2.append('time')
					resulttve2.append(m.group(1))
					resulttve2.append('title')
					resulttve2.append(m.group(4))
					description = m.group(5)
					if description:
						resulttve2.append('desc')
						resulttve2.append(description)
					textextract = m.group(7)
				else:
					finished = True
			#Retrieved last entry
			p = re.compile(r'\s*([0-9]{1,2}:[0-9]{2})(:[0-9]{2})?\s*(-|&ndash;)*\s*(.*)\s*?([\s\S]*?)')
			m = p.search(textextract)
			if m:
				if verbose > 2:
					print m.groups()
				resulttve2.append('time')
				resulttve2.append(m.group(1))
				resulttve2.append('title')
				resulttve2.append(m.group(4))
				description = m.group(5)
				if description:
					resulttve2.append('desc')
					resulttve2.append(description)

			if not resulttve2:
				print >> sys.stderr, "Couldn't extract any content from TVE"
				#raise Error("Couldn't extract any content from TVE",402)
			else:
				#Add date
				resulttve2[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]	
				#Add it to the processed table
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = tveidname
				finishtab[-1]['result'] = resulttve2
				finishtab[-1]['lang'] = 'es'
				finishtab[-1]['name'] = tvechannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring TVE'

		if grab_channel['cctv']:
			if verbose > 0:
				print 'Processing CCTV'
			#Generate URL
			tz = pytz.timezone('Asia/Shanghai')
			for channel in cctvchannels:
				date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
				parser = Scraper(statecctv,completeclass=True)
				days = min(8-offset, maxdays+1)
				for i in range(days):
					url = '%s%s/%02d/%04d%02d%02d.shtml' % (googletranslatechinese, cctvurl, channel['number'], date.year, date.month, date.day)
					if verbose > 0:
						print 'Reading url=%s' % url
					try:
						handle = Web(url)
					except Error, e:
						print >> sys.stderr, 'Error:', e.code, e.message
					else:
						data = handle.read()
						handle.close()
						parser.feed(data,ignoretag='br')
						date +=  datetime.timedelta(days=1)
				result = parser.processed
				parser.close()
				if not result:
					print >> sys.stderr, "Couldn't extract any content from CCTV"
					#raise Error("Couldn't extract any content from CCTV",415)
				else:
					result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]	
					result = xmltv.scangenre(result)
					finishtab.append(copy.deepcopy(finishtemplate))
					finishtab[-1]['id'] = channel['id']
					finishtab[-1]['result'] = result
					finishtab[-1]['lang'] = 'cn'
					finishtab[-1]['name'] = channel['name']
					finishtab[-1]['timezone'] = calc_timezone(date1)
					finishtab[-1]['dst'] = 0
					finishtab[-1]['encoding'] = 'utf-8'
		else:
			if verbose > 0:
				print 'Ignoring CCTV'


		if grab_channel['cuba']:
			if verbose > 0:
				print 'Processing Cuba Vision'
			#Generate URL
			#Cuba Vison URL in in the format 'url'?Dia=x
			#Calculator today's day, Cuba time (GMT -4) 0->Sunday, 1->Monday etc...
			tz = pytz.timezone('Cuba')
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			weekday = date.isoweekday() % 7
			#Read web site
			data = ''
			parser = Scraper(statecuba)
			#Cuba only has 7 days worth of data from today
			days = min(7-offset, maxdays+1)
			for i in range(days):
				if verbose > 0:
					print 'Reading url=%s?Dia=%d' % (cubaurl,weekday)
				try:
					handle = Web(cubaurl + '?Dia=' + `weekday`)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					weekday = (weekday + 1) % 7
			cubaresult = parser.processed
			parser.close()
			if not cubaresult:
				print >> sys.stderr, "Couldn't extract any content from Cuba"
				#raise Error("Couldn't extract any content from Cuba",404)
			else:
				cubaresult[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]	
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = cubaidname
				finishtab[-1]['result'] = cubaresult
				finishtab[-1]['lang'] = 'es'
				finishtab[-1]['name'] = cubachannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Cuba Vision'

		if grab_channel['bvn']:
			if verbose > 0:
				print 'Processing Dutch TV'
			tz = pytz.timezone('UTC')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Read 7 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(7, maxdays+1)
			result = []
			p = re.compile(r"{\s*title:\s*'(?P<title>.*?)',\s*time:\s*'(?P<time>.*?)',\s*description:\s*'(?P<desc>.*?)(<a.*>(?P<desc2>.*?)</a>(?P<desc3>.*?))*',",re.UNICODE)
			#Calculate the day of the year
			for i in range(days):
				url = '%s?date=%04d%02d%02d' % (bvnurl, date.year, date.month, date.day)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					m = re.compile(r"var programs = new Array\((.*)\);",re.DOTALL).search(data)
					if m:
						token = p.finditer(m.group(1))
						for j in token:
							result.append('time')
							result.append(j.group('time'))
							result.append('title')
							result.append(j.group('title'))
							result.append('desc')
							result.append(j.group('desc') + ' ' + (j.group('desc2') != None and j.group('desc2') or '') +
											' ' + (j.group('desc3') != None and j.group('desc3') or '') )
					else:
						break
					date += datetime.timedelta(days=1)

			result = xmltv.scangenre(result)

			if not result:
				print >> sys.stderr, "Couldn't extract any content from Dutch TV"
				#raise Error("Couldn't extract any content from Dutch TV",408)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]

				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = bvnidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'nl'
				finishtab[-1]['name'] = bvnchannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
				finishtab[-1]['encoding'] = 'utf-8'
		else:
			if verbose > 0:
				print 'Ignoring Dutch TV'

		if grab_channel['ertworld']:
			if verbose > 0:
				print 'Processing ERT World'
			tz = pytz.timezone('Europe/Athens')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset-1)
			#Read 7 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(7, maxdays+1)
			parser = Scraper(stateertworld)
			#Calculate the day of the year
			for i in range(days):
				url = '%s?pday=%d' % (ertworldurl, (date - datetime.datetime(date.year,1,1,tzinfo=tz)).days + 1)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,emptyfield=False)
				date += datetime.timedelta(days=1)

			ertworldresult = xmltv.scangenre(self.time_title(parser.processed))
			parser.close()

			if not ertworldresult:
				print >> sys.stderr, "Couldn't extract any content from ERT World"
				#raise Error("Couldn't extract any content from ERT World",408)
			else:
				ertworldresult[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]

				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = ertworldidname
				finishtab[-1]['result'] = ertworldresult
				finishtab[-1]['lang'] = 'gr'
				finishtab[-1]['name'] = ertworldchannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
				finishtab[-1]['encoding'] = 'windows-1253'
		else:
			if verbose > 0:
				print 'Ignoring ERT World'

		if grab_channel['duna']:
			if verbose > 0:
				print 'Processing Duna'
			#Generate URL
			tz = pytz.timezone('Europe/Budapest')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset-1)
			#Read web site
			parser = Scraper(stateduna,completeclass=True)
			#TRT only has 7 days worth of data from today
			days = min(7-offset, maxdays+1)
			counter = 1
			for i in range(days):
				url = '%s?nap=%04d-%02d-%02d&channel=4284' % (dunaurl, date.year, date.month, date.day)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br|p|b')
					date += datetime.timedelta(days=1)
			result = parser.processed
			parser.close()
			if not result:
				print >> sys.stderr, "Couldn't extract any content from Duna"
				#raise Error("Couldn't extract any content from Duna",404)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]	
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = dunaidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'hu'
				finishtab[-1]['name'] = dunachannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Duna'

		if grab_channel['tvri']:
			if verbose > 0:
				print 'Processing TVRI'
			tz = pytz.timezone('Asia/Jakarta')
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Only today's data
			parser = Scraper(statetvri)
			url = tvriurl
			if verbose > 0:
				print 'Reading url=%s' % url
			try:
				handle = Web(url)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				data = handle.read()
				handle.close()
				parser.feed(data,ignoretag='br')

			result = parser.processed
			parser.close()
			
			if not result:
				print >> sys.stderr, "Couldn't extract any content from TVRI"
				#raise Error("Couldn't extract any content from VTV4",414)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = tvriidname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'id'
				finishtab[-1]['name'] = tvrichannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring TVRI'

		if grab_channel['rtm']:
			if verbose > 0:
				print 'Processing Malaysian TV'
			#Calculate timezone in Kuala Lumpur
			tz = pytz.timezone('Asia/Kuala_Lumpur')
			date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset-1)
			#Read 8 days maximum worth of programme
			days = min(8-offset, maxdays+1)
			parser = Scraper(statertm)
			for i in range(days):
				url = '%s?date=%04d-%02d-%02d' % ( rtmurl , date.year, date.month, date.day)
				if verbose > 0: 
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
				date += datetime.timedelta(days=1)

			rtmresult = parser.processed
			parser.close()
			if not rtmresult:
				print >> sys.stderr, "Couldn't extract any content from NewsAsia"
				#raise Error("Couldn't extract any content from Malaysian TV",409)
			else:
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = rtmidname
				finishtab[-1]['result'] = rtmresult
				finishtab[-1]['lang'] = 'my'
				finishtab[-1]['name'] = rtmchannelname
				finishtab[-1]['timezone'] = calc_timezone(date)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Malaysian TV'

		if grab_channel['rt']:
			if verbose > 0:
				print 'Processing Russia Today'
			tz = pytz.timezone('UTC')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Read 7 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(7-offset, maxdays+1)
			parser = Scraper(staterussia)
			for i in range(days):
				url = '%s/%02d-%02d-%02d?tz=0' % (russiaurl, date.year, date.month, date.day)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data)
					date += datetime.timedelta(days=1)

			russiaresult = xmltv.scangenre(self.time_title(parser.processed))
			parser.close()
			
			if not russiaresult:
				print >> sys.stderr, "Couldn't extract any content from Russia Today"
				#raise Error("Couldn't extract any content from Russia Today",407)
			else:
				russiaresult[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]

				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = russiaidname
				finishtab[-1]['result'] = russiaresult
				finishtab[-1]['lang'] = 'ru'
				finishtab[-1]['name'] = russiachannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Russia Today'

		if grab_channel['sctv']:
			if verbose > 0:
				print 'Processing SCTV'
			#Program on SCTV starts on Monday, so calculate the date of last Monday
			tz = pytz.timezone('Australia/Sydney')
			date = datetime.datetime.now(tz=tz) - datetime.timedelta(days=datetime.datetime.now(tz=tz).weekday())
			#SCTV has shocking html, so purely extract all possible text and run regular expression to extract the time and title
			parser = Scraper(statesctv)
			try:
				handle = Web(sctvurl)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				data = handle.read()
				handle.close()
				parser.feed(data,emptyfield=False)
				sctvresult = parser.processed
				parser.close()
				textextract = ''
				for i in range (len(sctvresult)/2):
					if sctvresult[i*2] == 'title':
						textextract += sctvresult[i*2+1] + '\n'

				p = re.compile(r'([0-9]{2}:[0-9]{2}[ap]m)\s+-\s+(.*)\s+([0-9]{2}:[0-9]{2}[ap]m\s+-\s+[\s\S]*)')
				sctvresult2 = []
				finished = False
				while not finished:
					m = p.search(textextract)
					if m:
						sctvresult2.append('time')
						sctvresult2.append(m.group(1))
						sctvresult2.append('title')
						sctvresult2.append(m.group(2))
						textextract = m.group(3)
					else:
						finished = True
				#Retrieved last entry
				p = re.compile(r'([0-9]{2}:[0-9]{2}[ap]m)\s+-\s+(.*)')
				m = p.search(textextract)
				if m:
					sctvresult2.append('time')
					sctvresult2.append(m.group(1))
					sctvresult2.append('title')
					sctvresult2.append(m.group(2))

				if not sctvresult2:
					print >> sys.stderr, "Couldn't extract any content from SCTV"
					#raise Error("Couldn't extract any content from SCTV",405)
				else:
					sctvresult2[:0] = ['date','%02d/%02d/%04d' % (date.day,date.month,date.year)]

					finishtab.append(copy.deepcopy(finishtemplate))
					finishtab[-1]['id'] = sctvidname
					finishtab[-1]['result'] = sctvresult2
					finishtab[-1]['lang'] = 'en'
					finishtab[-1]['name'] = sctvchannelname
					finishtab[-1]['timezone'] = calc_timezone(date)
					finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring SCTV'

		if grab_channel['tv5']:
			#Process TV5 data
			if verbose > 0:
				print 'Processing TV5'
			#Set cookie for TV5
			cj = cookielib.CookieJar()
			for i in cookietv5:
				if conf.config.has_key(cookietv5[i]):
					 cookietv5[i] = `time_zone[conf.config[cookietv5[i]]][2]`
			for i in cookietv5:
				cookie = cookielib.Cookie(0,i,cookietv5[i],None,False,tv5mainurl,True,True,'/', True, False, maxtv5cookie, False, None, None, {} )
				cj.set_cookie(cookie)
			opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
			urllib2.install_opener(opener)
			try:
				handle = Web(tv5url)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				data = handle.read()
				handle.close()
				if verbose > 0:
					print 'Done grabbing data.. processing'

				tz = pytz.timezone('Australia/Melbourne')
				date = datetime.datetime.now(tz=tz)

				parser = Scraper(statetv5)
				parser.feed(data,ignoretag='br')
				if not parser.processed:
					print >> sys.stderr, "Couldn't extract any content from TV5"
					#raise Error("Couldn't extract any content from TV5",400)
				else:
					#Add it to the processed table
					finishtab.append(copy.deepcopy(finishtemplate))
					finishtab[-1]['id'] = tv5idname
					finishtab[-1]['result'] = parser.processed
					finishtab[-1]['lang'] = 'fr'
					finishtab[-1]['name'] = tv5channelname
					finishtab[-1]['timezone'] = calc_timezone(date)
					finishtab[-1]['dst'] = dst
					parser.close()
		else:
			if verbose > 0:
				print 'Ignoring TV5'

		if grab_channel['voa']:
			if verbose > 0:
				print 'Processing Voice of America'
			tz = pytz.timezone('UTC')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Create parser instance
			parser = Scraper(statevoa)

			#Read 8 days maximum worth of programme
			days = min(8, maxdays+1)
			for i in range(days):
				data = { 'requestdate' : '%02d%02d%02d' % (date.month, date.day, date.year - 2000), 'satellite' : 'AS', 'type' : 'full'}
				try:
					handle = Web(voaurl, data=data)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					#Feed the data to the parser
					parser.feed(data,emptyfield=False,ignoretag='br')
				date += datetime.timedelta(days=1)

			voaresult = parser.processed
			parser.close()
			if not voaresult:
				print >> sys.stderr, "Couldn't extract any content from Voice of America"
				#raise Error("Couldn't extract any content from Voice of America",410)
			else:
				voaresult2 = []
				status = 0
				for i in range(len(voaresult)/2):
					x = voaresult[i*2]
					y = voaresult[i*2+1]
					if i*2+3 < len(voaresult):
						x2 = voaresult[i*2+2]
						y2 = voaresult[i*2+3]
					if i*2+5 < len(voaresult):
						x3 = voaresult[i*2+4]
						y3 = voaresult[i*2+5]
					else:
						x3 = y3 = ''
					if verbose > 1:
						print 'x',x,'y',y,'x2',x2,'y2',y2
					if x == 'data' and x2 == 'data2':
						if y == 'Time:':
							voaresult2.append('time')
							voaresult2.append(re.compile(r'\s*([0-9]{2}:[0-9]{2})').match(y2).group(1))
							status = 0
						elif y == 'Title:':
							voaresult2.append('title')
							voaresult2.append(y2)
						elif y == 'Episode:':
							voaresult2.append('subtitle')
							voaresult2.append(y2)
						elif y == 'Language:':
							status = 1
							lang = y2
						elif y == 'Description:':
							voaresult2.append('desc')
							if status == 1:
								voaresult2.append(y2 + '\nLanguage: ' + lang)
							else:
								voaresult2.append(y2)

					elif x == 'title' and x2 == 'title2' and x3 == 'title3':
						voaresult2.append('title')
						voaresult2.append(y2+y3)
					elif x == 'title' and x2 == 'title2':
						voaresult2.append('title')
						voaresult2.append(y2)
						
				voaresult2[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = voaidname
				finishtab[-1]['result'] = voaresult2
				finishtab[-1]['lang'] = 'en'
				finishtab[-1]['name'] = voachannelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['dst'] = 0
				finishtab[-1]['encoding'] = 'utf-8'
		else:
			if verbose > 0:
				print 'Ignoring Voice of America'

		if grab_channel['thaitv5']:
			if verbose > 0:
				print 'Processing Thai TV5'
			tz = pytz.timezone('Asia/Bangkok')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Read 7 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(7, maxdays+1)
			parser = Scraper(statethaitv5, completeclass=True)
			for i in range(days):
				url = '%s?date=%04d%02d%02d' % (thaitv5url, date.year, date.month, date.day)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					date += datetime.timedelta(days=1)

			result = self.time_title(parser.processed)
			parser.close()
			
			if not result:
				print >> sys.stderr, "Couldn't extract any content from Thai TV5"
				#raise Error("Couldn't extract any content from Thai TV5",413)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = thaitv5idname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'th'
				finishtab[-1]['name'] = thaitv5channelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['encoding'] = 'tis-620'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring Thai TV5'

		if grab_channel['vtv4']:
			if verbose > 0:
				print 'Processing VTV4'
			tz = pytz.timezone('Etc/GMT-7')
			date1 = date = datetime.datetime.now(tz=tz) + datetime.timedelta(days=offset)
			#Read 6 days maximum worth of programme, put maxdays+1 so we can get info about the last program of the day
			days = min(6, maxdays+1)
			parser = Scraper(statevtv4, completeclass=True)
			for i in range(days):
				url = '%s/%04d/%d/%d' % (vtv4url, date.year, date.month, date.day)
				if verbose > 0:
					print 'Reading url=%s' % url
				try:
					handle = Web(url)
				except Error, e:
					print >> sys.stderr, 'Error:', e.code, e.message
				else:
					data = handle.read()
					handle.close()
					parser.feed(data,ignoretag='br')
					date += datetime.timedelta(days=1)

			result = self.time_title(parser.processed)
			parser.close()
			
			if not result:
				print >> sys.stderr, "Couldn't extract any content from VTV4"
				#raise Error("Couldn't extract any content from VTV4",414)
			else:
				result[:0] = ['date','%02d/%02d/%04d' % (date1.day,date1.month,date1.year)]
				finishtab.append(copy.deepcopy(finishtemplate))
				finishtab[-1]['id'] = vtv4idname
				finishtab[-1]['result'] = result
				finishtab[-1]['lang'] = 'vn'
				finishtab[-1]['name'] = vtv4channelname
				finishtab[-1]['timezone'] = calc_timezone(date1)
				finishtab[-1]['encoding'] = 'utf-8'
				finishtab[-1]['dst'] = 0
		else:
			if verbose > 0:
				print 'Ignoring VTV4'

		#Read Nasa XMLTV from TPG
		docnasa = False
		if grab_channel['tpgnasa']:
			if verbose > 0:
				print 'Processing TPG NASA channel'
			tz = pytz.timezone('UTC')
			try:
				handle = Web(tpgnasaurl)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				data = handle.read()
				handle.close()
				docnasa = xmltv.filterxmltv(data,0,offset=offset, days=maxdays, removepattern = [r'(\S+?)\.tpg\.com\.au', r'tpg.\1']) 
		
		#Read XMLTV from TPG
		doctpg = False
		if grab_channel['tpg']:
			if verbose > 0:
				print 'Processing TPG channels'
			tz = pytz.timezone('UTC')
			try:
				handle = Web(tpgurl)
			except Error, e:
				print >> sys.stderr, 'Error:', e.code, e.message
			else:
				data = handle.read()
				handle.close()
				doctpg = xmltv.filterxmltv(data,0,offset=offset, days=maxdays, removepattern = [r'(\S+?)\.tpg\.com\.au', r'tpg.\1']) 
		
		#Generate XML
		for x in finishtab:
			xmltv.addchannel(x['name'], idprefix + x['id'], lang=x['lang'])
		if docnasa:
			xmltv.mergechannels(docnasa)
		if doctpg:
			xmltv.mergechannels(doctpg)
		for x in finishtab:
			if verbose > 0:
				print 'addqueue of', idprefix + x['id'], offset, maxdays, x['dst'], x['timezone'], x['marker'], x['encoding']
			xmltv.addqueueprogramme(x['result'], idprefix + x['id'], offset=offset, days=maxdays, dst=x['dst'], timezone=x['timezone'], marker=x['marker'], encoding=x['encoding'])
		if docnasa:
			xmltv.mergeprogrammes(docnasa)
		if doctpg:
			xmltv.mergeprogrammes(doctpg)

		# Print/Write our newly created XML
		if outfilename:
			output2 = open(outfilename,'w')
		if converttz:
			doc = xmltv.adjusttimezone(xmltv.doc,timezone)
			if pretty:
				output2.write(doc.toprettyxml(indent="  ", encoding="utf-8"))
			else:
				output2.write(doc.toxml(encoding="utf-8"))
		else:
			if pretty:
				output2.write(xmltv.toprettyxml())
			else:
				output2.write(xmltv.toxml())
		if outfilename:
			output2.close()

		return xmltv


if __name__ == '__main__':

	try:
		opts, args = getopt.getopt(sys.argv[1:], \
			"hvo:d:c:", ["help", "version", "quiet", "verbose", "days=", "pretty", "output=", "configure", "capabilities", "offset=", "config-file=", "preferredmethod", "description"])
	except getopt.GetoptError:
		# print help information and exit:
		print "Unrecognised option: "
		usage()
		sys.exit(2)

	output = sys.stdout
	pretty = False
	inconfigure = 0
	conf = None
	for o, a in opts:
		if o in ("-h", "--help"):
			usage()
			sys.exit(0)
		elif o == "--version":
			print >> sys.stderr, NAME, VERSION
			sys.exit(0)
		elif o == "--capabilities":
			print CAPABILITIES
			sys.exit(0)
		elif o == "--pretty":
			pretty = True
		elif o == "--preferredmethod":
			print PREFERREDMETHOD
			sys.exit(0)
		elif o == "--description":
			print DESCRIPTION
			sys.exit(0)
		elif o == "--quiet":
			verbose -= 50
		elif o == "--verbose":
			verbose += 1
		elif o in ("-d", "--days"):
			maxdays = int(a)
			if maxdays < 1:
				print >> sys.stderr, "invalid number of days"
				sys.exit(2)
		elif o == "--offset":
			day_offset = int(a)
		elif o in ("-o", "--output"):
			output = open(a,'w')
		elif o == "--configure":
			inconfigure = 1
		elif o in ("-c", "--config-file"):
			config_file = a

	conf = Config(config_file, inconfigure)
	
	if inconfigure:
		conf.initconfig()
		conf.write()
		sys.exit(0)

	if conf.exists == False:
		print >> sys.stderr, "Config file missing run with --configure"
		sys.exit(2)

	#Read config
	localtimezone =  time_zone[conf.config['TimeZone']][0]
	dst = time_zone[conf.config['TimeZone']][1]

	tv_grab=TVGRAB()
	try:
		tv_grab.main(conf,output=output, pretty=pretty,converttz=True,timezone=localtimezone,maxdays=maxdays,offset=day_offset)
	except Error, e:
		print >> sys.stderr, e.message
		print >> sys.stderr, 'Error:', e.code
		sys.exit(2)

	if output != sys.stdout:
		output.close()

#################################################################
# History
# 24/05/2009: version 0.4.10
# 24/05/2009: Fixed Al Jazeera timing, times were shifted by one program. 
# 18/05/2009: version 0.4.9
# 18/05/2009: Work-around Bloomberg broken time in some entries
# 19/03/2009: version 0.4.8
# 19/03/2009: Updated TRT International (change of channel number)
# 10/03/2009: version 0.4.7
# 10/03/2009: Updated for Bloomberg new format
# 24/11/2008: version 0.4.6
# 24/11/2008: Updated TRT International change of URL and format
# 08/11/2008: version 0.4.5
# 08/11/2008: Work around SCTV not using valid times
# 05/11/2008: version 0.4.4
# 05/11/2008: Remove DST calculations for the time being
# 05/10/2008: version 0.4.3
# 05/10/2008: Added NASA TV
# 11/06/2008: version 0.4.2
# 11/06/2008: Updated World of Fashion
# 11/05/2008: version 0.4.0
# 11/05/2008: Updated grabber for DW, Duna, BVN, TRT International. Grabbing from original web site instead
# 07/05/2008: version 0.3.1
# 07/05/2008: Changed User Agent String to look like we are using IE7 on Windows XP
# 05/05/2008: version 0.3.0
# 05/05/2008: Add CCTV channels, clean up html for TVE
# 01/05/2008: version 0.2.0
# 01/05/2008: Add TVRI, World of Fashion. Re-order channels to be similar to TPG's listing
# 01/05/2008: Add Thai TV5, VTV4
# 30/04/2008: version 0.1.7
# 30/04/2008: Add Eurosport, Eurosport News and Bloomberg
# 29/04/2008: version 0.1.6
# 29/04/2008: Convert date/time to local timezone
# 26/04/2008: version 0.1.5
# 26/04/2008: TV5 updated, add movie classic, playboy tv and adult one
# 19/04/2008: version 0.1.4
# 19/04/2008: Do not stop if a channel can't be processed
# 18/04/2008: version 0.1.3
# 18/04/2008: Added Voice of America
# 16/04/2008: version 0.1.2
# 16/04/2008: Add conversion of html escape code to utf-8. Added support for different encoding than iso8859-1. Added Greek TV and Malaysian TV
# 15/04/2008: version 0.1.1
# 15/04/2008: Add automatic category
# 15/04/2008: version 0.1.0
# 15/04/2008: Added another work around for TVE, sometimes they have midnight showing like 24:00:00
# 13/04/2008: version 0.0.9
# 13/04/2008: Added SCTV, AsiaNews, Russia Today
# 12/04/2008: version 0.0.8
# 12/04/2008: Added Cuba Vision and SCTV
# 11/04/2008: version 0.0.7
# 10/04/2008: Added cctv4, bvn, duna, trtint
# 09/04/2008: version 0.0.6
# 09/04/2008: Add support for lynsat. Added TVE
# 08/04/2008: version 0.0.5
# 08/04/2008: Add disclaimer notice. Add TVE
# 03/04/2008: version 0.0.4
# 03/04/2008: Rewrote xml generator. Add Al Jazeera
# 02/04/2008: Use python datetime module to perform time calculation
# 01/04/2008: version 0.0.3
# 31/03/2008: Rewrite of the scraper engine to use an automated state table. Will ease the addition of other channels
# 28/03/2008: add handling of DST and support of timezone
# 26/03/2008: 0.0.1 : First version
