Commit 8fa5cde1 authored by Roshan Rabinarayan's avatar Roshan Rabinarayan

added working version of q2

parent 5b251593
......@@ -2,9 +2,9 @@
#converting whitespaces to new line in temp file
sed -e 's/[\t ]\+/\n/g' -e 's/https/\nhttps/g' $1 >temp
#saving all urls
sed -n '/https\:\/\/www\.cse\.iitb\.ac\.in[/][~][^ ]\+/p' temp >url
#removing all urls from inputfile
sed -i -e 's/http[s]\?\:\/\/[^ ]\+/ /g' -e 's/[^ ]\+[.]\(com\|in\|org\|net\|co\|us\|edu\|gov\)[^ ]\?/ /g' $1
sed -i -e 's/[\t ]\+/ /g' $1
sed -n '/http[s]\:\/\/www\.cse\.iitb\.ac\.in[/][~][a-z]\{5,15\}/p' temp >url
rm temp
\ No newline at end of file
#trimming $1
sed -i -e 's/'
sed -i -e 's/[\t ]\+/ /g' -e 's/[)]\{1\}/ )/g' -e 's/http[s]\?\:\/\/[^ ]\+/ /g' -e 's/[ ][^ ^@]\+[.]\{1\}\(com\|in\|org\|net\|co\|us\|edu\|gov\)\{1\}[/]\{0,1\}[^ ^@]\+[ ]\+/ /g' -e 's/[ ]\{1\}[)]\{1\}/)/g' $1
sed -i -e 's/[\t ]\+/ /g' $1
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment