Python - Scripts

From XennisWiki
Jump to: navigation, search

This article contains some Python scripts.

Data and time

Convert string to date / timestamp

Pattern: strftime() and strptime() Behavior

Convert s string to date

import

# Example time: 24.05.2011, 01:00
time.strptime("2011052401", "%Y%m%d%H")

Convert a date to timestamp

""" Converts a date string to unix timestamp
:param date_string Date as string in the format
:param format Format of the given date string (e.g "%Y%m%d%H")
:return	Timestamp (in seconds)
"""
def string2timestamp(date_string, format):
	date = time.strptime(date_string, format)
	date_timestamp = int(time.mktime(date))		# use int() to get rid of .0
	return date_timestamp

Datetime util

def string_to_date(date_string, format="%Y-%m-%dT%H:%M"):
    from datetime import datetime

    return datetime.strptime(date_string, format)

Files

Open and read files

Read a text file

def readFile(filename):
    with open(filename) as f:
        # for line in f:
        #     print(line)
        return f.read()

Read a file as UTF-8

Further information: codecs — Codec registry and base classes

def readFile(filename, encoding="utf-8"):
    import codecs
    with codecs.open(filename, encoding=encoding) as f:
        return f.read()

Read a text or zipped file

def readFile(filename):
    import os, gzip

    fileName, fileExtension = os.path.splitext(filename)
    if fileExtension == '.gz':
        f = gzip.open(fileName, 'rb')
    else:
        f = open(fileName)
    return f.read()

Read huge XML files

Further information: High-performance XML parsing in Python with lxml

Install lxml (Note: the machine should have more then 500 MB RAM to build lxml)

sudo apt-get install python-dev libxml2-dev libxslt1-dev zlib1g-dev
sudo pip install lxml
Read huge file
#!/usr/bin/python
# -*- coding: utf-8 -*-
from lxml import etree

def _fast_iter(self, filename):
    context = etree.iterparse(filename, load_dtd=True, html=False)

    for _, elem in context:
        if elem.tag == 'example'
            print(elem.text)
        
        elem.clear()
        # Delete now-empty references from the root         
        while elem.getprevious() is not None:
            del elem.getparent()[0]
    del context

Write files

Write a text file

def writeFile(filename, content, mode="w+"):  # (create and) open file
    with open(filename, mode=mode) as f:
        f.write(content)

writeFile("myDir/myFile.txt", "Hello World\n")

Write a CVS file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs

class CsvFileWriter:
    
    ENCODING = 'utf-8'
    
    def __init__(self, filename):
        try:
            self.output = codecs.open(filename, 'w+', self.ENCODING)
        except(IOError) as e:
            raise e

    def close(self):
        self.output.close()
            
    def write_element(self, value = ''): 
        self.output.write('\t%s' % value)
        
    def write_first_element(self, value):
        self.output.write('\n%s' % value)

Write XML file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs


class XmlFileWriter:
    
    ENCODING = 'utf-8'
    
    def __init__(self):
        self.output = None
        self.tabbing = ''
    
    def write(self, line):
        if self.output:
            self.output.write(line)
        
    def _write_line(self, line):
        self.write('%s%s\n' % (self.tabbing, line))
        
    def write_start_tag(self, tag):
        self._write_line('<%s>' % tag)
        self.tabbing += '\t'  # add one tab
        
    def write_close_tag(self, tag):
        self.tabbing = self.tabbing[1:]  # remove one tab (\t); a tab is one character long
        self._write_line('</%s>' % (tag))
        
    def write_element(self, tag, value, is_cdata=False):
        if value:
            if is_cdata:
                value = '<![CDATA[%s]]>' % value
            self._write_line('<%s>%s</%s>' % (tag, value, tag))
            
    def write_declaration(self):
        self._write_line('<?xml version="1.0" encoding="{}"?>'.format(self.ENCODING))
    
    def open(self, filename):
        try:
            self.output = codecs.open(filename, 'w+', self.ENCODING)  # may just 'w'?
            self.tabbing = ''  # reset tabbing for new file
        except(IOError) as e:
            raise e

    def close(self):
        if self.output:
            self.output.close()

Copy files

def copyFiles(source_dir, files, dest_dir):
    import glob
    import shutil

    for filename in glob.glob(os.path.join(source_dir, files)):
        shutil.copy(filename, dest_dir)

copyFiles("example/path", "*.txt", "example/new")

List files in directories

import os

dirs = os.listdir('myDir')
print dirs

List specific files in a directory

A files with extension .txt in the directory myDir.

def listFiles(dir, fileExtension):
    import os
    return [file for file in os.listdir(dir) if file.endswith(fileExtension)]

print listFiles("myDir", ".txt")

Download a file

Download a file as stream with a progress bar

def download_file(url, path):
    local_filename = path+url.split('/')[-1]
    r = requests.get(url, stream=True)
    # r.headers
    with open(local_filename, 'wb') as f:
        print("Downloading %s" % local_filename)
        total_length = int(r.headers.get('content-length'))
        #for chunk in r.iter_content(chunk_size=1024): 
        for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                #f.flush()
    return local_filename

See also