#!/usr/bin/env python
import sys
from pyparsing import *
# pyparsing documentation:
# https://sourceforge.net/p/pyparsing/code/HEAD/tree/trunk/src/HowToUsePyparsing.txt#l302
STREAM_FIELDS = {"type", "filename", "num"}
# see http://stackoverflow.com/questions/11180622/optional-string-segment-in-pyparsing
def assign_missing_fields(fields):
"""Fill in missing optional field values (filename, num)."""
not_found = STREAM_FIELDS - set(fields.keys())
for k in not_found:
v = 0 if (k in ["num"]) else ""
# see http://pyparsing.wikispaces.com/share/view/71042464
fields[k] = v
fields.append(v)
def parser_bnf():
"""Grammar for parsing podcast configuration files."""
at = Literal("@").suppress()
caret = Literal("^")
colon = Literal(":").suppress()
comment_char = Literal("#")
hyphen = Literal("-")
left_bracket = Literal("[").suppress()
period = Literal(".").suppress()
right_bracket = Literal("]").suppress()
# zero_index ::= [0-9]+
zero_index = Word(nums).setParseAction(lambda s, l, t: int(t[0]))
# filename ::= [A-Za-z0-9][-A-Za-z0-9._ ]+
filename_first = Word(alphanums, exact=1)
filename_rest = Word(alphanums + "-_. ")
filename = Combine(filename_first + Optional(filename_rest))
# millisecs ::= "." [0-9]+
millisecs = (period +
(Word(nums).setParseAction(
lambda s, l, t: int(t[0][:3].ljust(3, "0")))
.setResultsName("ms")))
# hours, minutes, seconds ::= zero_index
hours = zero_index.setResultsName("hh")
minutes = zero_index.setResultsName("mm")
seconds = zero_index.setResultsName("ss")
# timestamp ::= hours ":" minutes ":" seconds [millisecs]
timestamp = Group(hours + colon + minutes + colon + seconds +
Optional(millisecs))
# duration_file ::= "@", filename
# We need a separate item for a lonely duration file timestamp so
# that we can attach a parse action just to the lonely case. Using
# duration_file alone means the parse action is attached to all
# instances of duration_file.
duration_file = at + filename
lonely_duration_file = at + filename
# timespecs ::= timestamp [duration_file | {timestamp}]
# If duration_file timestamp is lonely, prepend a zero timestamp.
timespecs = Or(
[lonely_duration_file.setParseAction(
lambda s, l, t: timestamp.parseString("00:00:00.000") + t),
timestamp + duration_file,
OneOrMore(timestamp)])
# last_frame ::= "-1" | "last"
last_frame = oneOf(["-1", "last"]).setParseAction(replaceWith(-1))
# frame_number ::= ":" (zero_index | last_frame)
frame_number = colon + Or([zero_index, last_frame]).setResultsName("num")
# stream_number ::= ":" zero_index
stream_number = colon + zero_index.setResultsName("num")
# input_file ::= ":" [filename]
input_file = colon + Optional(filename).setResultsName("filename")
# previous_segment ::= ":" "^"
previous_segment = colon + caret.setResultsName("filename")
# frame_input_file ::= input_file | previous_segment
frame_input_file = Or([input_file, previous_segment])
# frame_type ::= "frame" | "f"
frame_type = oneOf(["f", "frame"]).setParseAction(replaceWith("frame"))
# frame_input ::= frame_type [frame_input_file [frame_number]]
frame_input = (frame_type.setResultsName("type") +
Optional(frame_input_file +
Optional(frame_number)))
# video_type ::= "video" | "v"
video_type = oneOf(["v", "video"]).setParseAction(replaceWith("video"))
# video_input ::= video_type [input_file [stream_number]]
video_input = (video_type.setResultsName("type") +
Optional(input_file +
Optional(stream_number)))
# audio_type ::= "audio" | "a"
audio_type = oneOf(["a", "audio"]).setParseAction(replaceWith("audio"))
# audio_input ::= audio_type [input_file [stream_number]]
audio_input = (audio_type.setResultsName("type") +
Optional(input_file + Optional(stream_number)))
# audio_or_video_input ::= audio_input | video_input
audio_or_video_input = Or([audio_input, video_input])
# inputspec ::= "[" (audio_or_video_input | frame_input) "]"
inputspec = (left_bracket +
delimitedList(
Or([audio_or_video_input, frame_input]), delim=":")
.setParseAction(assign_missing_fields) +
right_bracket)
# streamspec ::= inputspec [timespecs]
streamspec = Group(inputspec +
Group(Optional(timespecs)).setResultsName("times"))
# config ::= {streamspec}
config = ZeroOrMore(streamspec)
config.ignore(pythonStyleComment)
return config
def parse_configuration_file(config_file):
"""Parse a podcast configuration file."""
parser = parser_bnf()
return parser.parseFile(config_file, parseAll=True)
def parse_configuration_string(config_string):
"""Parse a podcast configuration file."""
parser = parser_bnf()
return parser.parseString(config_string, parseAll=True)
def test_parser():
tests = ["config1.txt", "config2.txt", "config3.txt", "config4.txt"]
for t in tests:
print "==={f}===".format(f=t)
r = parse_configuration_file("/".join(["/Users/nstanger/tmp", t]))
for s in r:
print s
print " type = {t}".format(t=s["type"])
print " filename = '{f}'".format(f=s["filename"])
print " num = {n}".format(n=s["num"])
print " times = {t}".format(t=s["times"])
for i, t in enumerate(s["times"]):
if (isinstance(t, str)):
print " punch out after duration of '{f}'".format(f=t)
if (isinstance(t, ParseResults)):
if (i % 2 == 0):
print " punch in at: {hh:02d}:{mm:02d}:{ss:02d}.{ms:03d}".format(hh=t["hh"], mm=t["mm"], ss=t["ss"], ms=t["ms"])
else:
print " punch out at: {hh:02d}:{mm:02d}:{ss:02d}.{ms:03d}".format(hh=t["hh"], mm=t["mm"], ss=t["ss"], ms=t["ms"])
print
if (__name__ == "__main__"):
test_parser()