Question 1¶
line1 = 'QR57613 1.3 Serpentes Pythonidae'
parts = line1.split()
num_words = len(parts)
print(num_words)4
Question 2¶
line2 = 'QR57613\t1.3\tSerpentes\tPythonidae\tPython regius'
fields = line2.split('\t')
num_fields = len(fields)
print(num_fields)5
line3 = 'ABC^^DEF^^GHI'
print(line3.split('^^'))['ABC', 'DEF', 'GHI']
Question 3¶
findhom_result = """#FINDHOM v 1.2:
Search results:
Query\tMatch fraction\tScore\tSubject
SMPL001\t0.7\t12331\tAQ10213 Phlebotomus perniciosus
SMPL003\t0.5\t6032\tBZ102363 Phlebotomus papatasi
SMPL004\t0.8\t13123\tRD178237 Sergentomyia dubia
SMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi"""findhom_result'#FINDHOM v 1.2:\nSearch results:\nQuery\tMatch fraction\tScore\tSubject\nSMPL001\t0.7\t12331\tAQ10213 Phlebotomus perniciosus\nSMPL003\t0.5\t6032\tBZ102363 Phlebotomus papatasi\nSMPL004\t0.8\t13123\tRD178237 Sergentomyia dubia\nSMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi'lines = findhom_result.split('\n') # split on newline
for line in lines:
print("LINE:", line)LINE: #FINDHOM v 1.2:
LINE: Search results:
LINE: Query Match fraction Score Subject
LINE: SMPL001 0.7 12331 AQ10213 Phlebotomus perniciosus
LINE: SMPL003 0.5 6032 BZ102363 Phlebotomus papatasi
LINE: SMPL004 0.8 13123 RD178237 Sergentomyia dubia
LINE: SMPL007 0.6 10610 BQ187981 Phlebotomus papatasi
lines[-1]'SMPL007\t0.6\t10610\tBQ187981 Phlebotomus papatasi'print(lines[-1])SMPL007 0.6 10610 BQ187981 Phlebotomus papatasi
Question 4¶
string1 = 'Mountain Goat'
string2 = 'Field Goat'
print(string1.startswith('Mountain'), 'Mountain')
print(string2.startswith('Mountain'), 'Mountain')
# there is also .endswith()
# is the word Mountain in string1 ?
got_mountain = 'Mountain' in string1
print(got_mountain)True Mountain
False Mountain
True
total = 0
# convert findhom_results into lines
lines = findhom_results.__A____
# search through all of the lines
___B____
# check if the line starts with 'SMPL'
if __C___
total += 1
print("total count of 'SMPL'", total) Cell In[10], line 7
if __C___
^
IndentationError: unexpected indent
total = 0
# convert findhom_results into lines
lines = findhom_result.split('\n')
# search through all of the lines
for line in lines:
print("LINE:", line)
# check if the line starts with 'SMPL'
if line.startswith('SMPL'):
total += 1
print("total count of 'SMPL'", total)LINE: #FINDHOM v 1.2:
LINE: Search results:
LINE: Query Match fraction Score Subject
LINE: SMPL001 0.7 12331 AQ10213 Phlebotomus perniciosus
LINE: SMPL003 0.5 6032 BZ102363 Phlebotomus papatasi
LINE: SMPL004 0.8 13123 RD178237 Sergentomyia dubia
LINE: SMPL007 0.6 10610 BQ187981 Phlebotomus papatasi
total count of 'SMPL' 4
Question 5¶
result_count = 0
lines = findhom_result.split('\n')
for line in lines:
if '\t' in line:
result_count += 1
result_count = result_count - 1
print(result_count)4
# version 2
result_count = 0
lines = findhom_result.split('\n')
for line in lines:
fields = line.split('\t')
# count all of the lines that have 4 tab seperated fields
if len(fields) == 4:
result_count += 1
result_count = result_count - 1
print(result_count)4
# solve it using a "state machine" with two states: header found and header not found
count = 0
header_found = False
lines = findhom_result.split('\n')
for line in lines:
if line.startswith("Query"):
header_found = True
continue # skip the header line itself
if header_found:
count += 1
print(count)4
# solve it using a "state machine" with two states: header found and header not found
count = 0
header_found = False
lines = findhom_result.split('\n')
for line in lines:
if header_found:
count += 1
if line.startswith("Query"):
header_found = True
print(count)4