/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

  • Committer: edam
  • Date: 2011-03-19 20:33:09 UTC
  • Revision ID: edam@waxworlds.org-20110319203309-5dzfyqrxwk94jtin
- formatting: removed some double-indents on overrunning lines
- updated TODO and NEWS
- rewrote central logic of parser so it makes more sense, looks nicer and has a small optimisation (getting name and params from line only when necessary)
- optimised unnecessary mutliple converting of lines to US-ASCII
- re-wrote line extraction from vcards so that we can lookahead for v3 folded lines
- added support for v3 folded lines

Show diffs side-by-side

added added

removed removed

1
1
/*
2
2
 * VCFImporter.java
3
3
 *
4
 
 * Copyright (C) 2009 to 2011 Tim Marston <edam@waxworlds.org>
 
4
 * Copyright (C) 2009 Tim Marston <edam@waxworlds.org>
5
5
 *
6
6
 * This file is part of the Import Contacts program (hereafter referred
7
7
 * to as "this program"). For more information, see
32
32
import java.io.IOException;
33
33
import java.io.UnsupportedEncodingException;
34
34
import java.nio.ByteBuffer;
35
 
import java.util.ArrayList;
36
35
import java.util.Arrays;
37
 
import java.util.HashMap;
38
36
import java.util.HashSet;
39
37
import java.util.Iterator;
40
38
import java.util.List;
41
 
import java.util.NoSuchElementException;
42
39
import java.util.Set;
43
40
import java.util.Vector;
44
41
import java.util.regex.Matcher;
45
42
import java.util.regex.Pattern;
 
43
import java.util.NoSuchElementException;
 
44
import java.lang.UnsupportedOperationException;
46
45
 
47
46
import android.content.SharedPreferences;
48
47
import android.provider.Contacts;
50
49
 
51
50
public class VCFImporter extends Importer
52
51
{
53
 
        private int _vcard_count = 0;
 
52
        private int _vCardCount = 0;
54
53
        private int _progress = 0;
55
54
 
56
55
        public VCFImporter( Doit doit )
110
109
                        countVCardFile( files[ i ] );
111
110
                        setTmpProgress( i );
112
111
                }
113
 
                setProgressMax( _vcard_count ); // will also update tmp progress
 
112
                setProgressMax( _vCardCount );  // will also update tmp progress
114
113
 
115
114
                // import them
116
115
                setProgress( 0 );
128
127
 
129
128
                        // read
130
129
                        String line;
131
 
                        boolean in_vcard = false;
 
130
                        boolean inVCard = false;
132
131
                        while( ( line = reader.readLine() ) != null )
133
132
                        {
134
 
                                if( !in_vcard ) {
 
133
                                if( !inVCard ) {
135
134
                                        // look for vcard beginning
136
135
                                        if( line.matches( "^BEGIN:VCARD" ) ) {
137
 
                                                in_vcard = true;
138
 
                                                _vcard_count++;
 
136
                                                inVCard = true;
 
137
                                                _vCardCount++;
139
138
                                        }
140
139
                                }
141
140
                                else if( line.matches( "^END:VCARD" ) )
142
 
                                        in_vcard = false;
 
141
                                        inVCard = false;
143
142
                        }
144
143
 
145
144
                }
185
184
                throws AbortImportException
186
185
        {
187
186
                // go through lines
188
 
                VCard vcard = null;
 
187
                VCard vCard = null;
189
188
                ContentLineIterator cli = new ContentLineIterator( content );
190
189
                while( cli.hasNext() )
191
190
                {
202
201
                                line = "";
203
202
                        }
204
203
 
205
 
                        if( vcard == null ) {
 
204
                        if( vCard == null ) {
206
205
                                // look for vcard beginning
207
206
                                if( line.matches( "^BEGIN:VCARD" ) ) {
208
207
                                        setProgress( ++_progress );
209
 
                                        vcard = new VCard();
 
208
                                        vCard = new VCard();
210
209
                                }
211
210
                        }
212
211
                        else {
215
214
                                {
216
215
                                        // store vcard and do away with it
217
216
                                        try {
218
 
                                                vcard.finaliseParsing();
219
 
                                                importContact( vcard );
 
217
                                                vCard.finaliseParsing();
 
218
                                                importContact( vCard );
220
219
                                        }
221
220
                                        catch( VCard.ParseException e ) {
222
221
                                                skipContact();
231
230
                                                skipContact();
232
231
                                                // do nothing
233
232
                                        }
234
 
                                        vcard = null;
 
233
                                        vCard = null;
235
234
                                }
236
235
                                else
237
236
                                {
238
237
                                        // try giving the line to the vcard
239
238
                                        try {
240
 
                                                vcard.parseLine( buffer, line,
 
239
                                                vCard.parseLine( buffer, line,
241
240
                                                        cli.doesNextLineLookFolded() );
242
241
                                        }
243
242
                                        catch( VCard.ParseException e ) {
252
251
                                                // although we're continuing, we still need to abort
253
252
                                                // this vCard. Further lines will be ignored until we
254
253
                                                // get to another BEGIN:VCARD line.
255
 
                                                vcard = null;
 
254
                                                vCard = null;
256
255
                                        }
257
256
                                        catch( VCard.SkipContactException e ) {
258
257
                                                skipContact();
259
258
                                                // abort this vCard. Further lines will be ignored until
260
259
                                                // we get to another BEGIN:VCARD line.
261
 
                                                vcard = null;
 
260
                                                vCard = null;
262
261
                                        }
263
262
                                }
264
263
                        }
331
330
        private class VCard extends ContactData
332
331
        {
333
332
                private final static int NAMELEVEL_NONE = 0;
334
 
                private final static int NAMELEVEL_FN = 1;
335
 
                private final static int NAMELEVEL_N = 2;
336
 
 
337
 
                private final static int MULTILINE_NONE = 0;
338
 
                private final static int MULTILINE_ENCODED = 1; // v2.1 quoted-printable
339
 
                private final static int MULTILINE_ESCAPED = 2; // v2.1 \\CRLF
340
 
                private final static int MULTILINE_FOLDED = 3;  // v3.0 folding
 
333
                private final static int NAMELEVEL_ORG = 1;
 
334
                private final static int NAMELEVEL_FN = 2;
 
335
                private final static int NAMELEVEL_N = 3;
341
336
 
342
337
                private String _version = null;
343
338
                private Vector< ByteBuffer > _buffers = null;
344
339
                private int _name_level = NAMELEVEL_NONE;
345
 
                private int _parser_multiline_state = MULTILINE_NONE;
 
340
                private boolean _parser_in_encoded_multiline = false;
 
341
                private boolean _parser_in_folded_multiline = false;
346
342
                private String _parser_current_name_and_params = null;
347
343
                private String _parser_buffered_value_so_far = "";
348
 
                private String _cached_organisation = null;
349
 
                private String _cached_title = null;
350
344
 
351
345
                protected class UnencodeResult
352
346
                {
475
469
                                String name_and_params;
476
470
                                int pos;
477
471
 
478
 
                                if( _parser_multiline_state != MULTILINE_NONE )
 
472
                                if( _parser_in_encoded_multiline ||
 
473
                                        _parser_in_folded_multiline )
479
474
                                {
480
475
                                        // if we're currently in a multi-line value, use the stored
481
476
                                        // property name and parameters
482
477
                                        name_and_params = _parser_current_name_and_params;
483
478
 
484
 
                                        // skip some initial line characters, depending on the type
485
 
                                        // of multi-line we're handling
486
479
                                        pos = buffer.position();
487
 
                                        switch( _parser_multiline_state )
488
 
                                        {
489
 
                                        case MULTILINE_FOLDED:
 
480
 
 
481
                                        // for folded multi-lines, skip the single space at the
 
482
                                        // start of the next line
 
483
                                        if( _parser_in_folded_multiline )
490
484
                                                pos++;
491
 
                                                break;
492
 
                                        case MULTILINE_ENCODED:
 
485
 
 
486
                                        // else, this must be an encoded multi-line, so skip any
 
487
                                        // whitespace we find at the start of the next line
 
488
                                        else
493
489
                                                while( pos < buffer.limit() && (
494
490
                                                        buffer.get( pos ) == ' ' ||
495
491
                                                        buffer.get( pos ) == '\t' ) )
496
492
                                                {
497
493
                                                        pos++;
498
494
                                                }
499
 
                                                break;
500
 
                                        default:
501
 
                                                // do nothing
502
 
                                        }
503
 
 
504
 
                                        // take us out of multi-line so that we can re-detect that
505
 
                                        // this line is a multi-line or not
506
 
                                        _parser_multiline_state = MULTILINE_NONE;
507
495
                                }
508
496
                                else
509
497
                                {
564
552
//                                      unencoding_result = unencodeBase64( props[ 1 ], charset );
565
553
                                if( unencoding_result != null ) {
566
554
                                        value = unencoding_result.getBuffer();
567
 
                                        if( unencoding_result.isAnotherLineRequired() )
568
 
                                                _parser_multiline_state = MULTILINE_ENCODED;
 
555
                                        _parser_in_encoded_multiline =
 
556
                                                unencoding_result.isAnotherLineRequired();
569
557
                                }
570
558
 
571
559
                                // convert 8-bit ASCII charset to US-ASCII
583
571
                                        throw new ParseException( R.string.error_vcf_charset );
584
572
                                }
585
573
 
586
 
                                // for some entries that have semicolon-separated value parts,
587
 
                                // check to see if the value ends in an escape character, which
588
 
                                // indicates that we have a multi-line value
589
 
                                if( ( name_param_parts[ 0 ].equals( "N" ) ||
590
 
                                        name_param_parts[ 0 ].equals( "ORG" ) ||
591
 
                                        name_param_parts[ 0 ].equals( "ADR" ) ) &&
592
 
                                        doesStringEndInAnEscapeChar( string_value ) )
593
 
                                {
594
 
                                        _parser_multiline_state = MULTILINE_ESCAPED;
595
 
                                        string_value = string_value.substring( 0,
596
 
                                                string_value.length() - 1 );
597
 
                                }
598
 
 
599
574
                                // now we know whether we're in an encoding multi-line,
600
575
                                // determine if we're in a v3 folded multi-line or not
601
 
                                if( _parser_multiline_state == MULTILINE_NONE &&
602
 
                                        _version.equals( "3.0" ) && next_line_looks_folded )
 
576
                                _parser_in_folded_multiline = !_parser_in_encoded_multiline &&
 
577
                                        _version.equals( "3.0" ) && next_line_looks_folded;
 
578
 
 
579
                                // handle multi-line requests
 
580
                                if( _parser_in_encoded_multiline ||
 
581
                                        _parser_in_folded_multiline )
603
582
                                {
604
 
                                        _parser_multiline_state = MULTILINE_FOLDED;
605
 
                                }
606
 
 
607
 
                                // handle multi-lines by buffering them and parsing them when we
608
 
                                // are processing the last line in a multi-line sequence
609
 
                                if( _parser_multiline_state != MULTILINE_NONE ) {
610
583
                                        _parser_buffered_value_so_far += string_value;
611
584
                                        return;
612
585
                                }
 
586
 
 
587
                                // add on buffered multi-line content
613
588
                                String complete_value =
614
 
                                        ( _parser_buffered_value_so_far + string_value ).trim();
 
589
                                        _parser_buffered_value_so_far + string_value;
615
590
 
616
591
                                // ignore empty values
617
592
                                if( complete_value.length() < 1 ) return;
623
598
                                        parseFN( name_param_parts, complete_value );
624
599
                                else if( name_param_parts[ 0 ].equals( "ORG" ) )
625
600
                                        parseORG( name_param_parts, complete_value );
626
 
                                else if( name_param_parts[ 0 ].equals( "TITLE" ) )
627
 
                                        parseTITLE( name_param_parts, complete_value );
628
601
                                else if( name_param_parts[ 0 ].equals( "TEL" ) )
629
602
                                        parseTEL( name_param_parts, complete_value );
630
603
                                else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
631
604
                                        parseEMAIL( name_param_parts, complete_value );
632
 
                                else if( name_param_parts[ 0 ].equals( "ADR" ) )
633
 
                                        parseADR( name_param_parts, complete_value );
634
 
                        }
635
 
                }
636
 
 
637
 
                private boolean doesStringEndInAnEscapeChar( String string )
638
 
                {
639
 
                        // count the number of backslashes at the end of the string
640
 
                        int count = 0;
641
 
                        for( int a = string.length() - 1; a >= 0; a-- )
642
 
                                if( string.charAt( a ) == '\\' )
643
 
                                        count++;
644
 
                                else
645
 
                                        break;
646
 
 
647
 
                        // if there are an even number of backslashes then the final one
648
 
                        // doesn't count
649
 
                        return ( count & 1 ) == 1;
650
 
                }
651
 
 
652
 
                private String[] splitValueBySemicolon( String value )
653
 
                {
654
 
                        // split string in to parts by semicolon
655
 
                        ArrayList< String > parts = new ArrayList< String >(
656
 
                                Arrays.asList( value.split(  ";" ) ) );
657
 
 
658
 
                        // go through parts
659
 
                        for( int a = 0; a < parts.size(); a++ )
660
 
                        {
661
 
                                String str = parts.get( a );
662
 
 
663
 
                                // look for parts that end in an escape character, but ignore
664
 
                                // the final part. We've already detected escape chars at the
665
 
                                // end of the final part in parseLine() and handled multi-lines
666
 
                                // accordingly.
667
 
                                if( a < parts.size() - 1 &&
668
 
                                        doesStringEndInAnEscapeChar( str ) )
669
 
                                {
670
 
                                        // join the next part to this part and remove the next part
671
 
                                        parts.set( a, str.substring( 0, str.length() - 1 ) +
672
 
                                                ';' + parts.get( a + 1 ) );
673
 
                                        parts.remove( a + 1 );
674
 
 
675
 
                                        // re-visit this part
676
 
                                        a--;
677
 
                                        continue;
678
 
                                }
679
 
 
680
 
                                // trim and replace string
681
 
                                str = str.trim();
682
 
                                parts.set( a, str );
683
 
                        }
684
 
 
685
 
                        String[] ret = new String[ parts.size() ];
686
 
                        return parts.toArray( ret );
 
605
                        }
687
606
                }
688
607
 
689
608
                private void parseN( String[] params, String value )
 
609
                        throws ParseException, SkipContactException,
 
610
                        AbortImportException
690
611
                {
691
612
                        // already got a better name?
692
613
                        if( _name_level >= NAMELEVEL_N ) return;
693
614
 
694
615
                        // get name parts
695
 
                        String[] name_parts = splitValueBySemicolon( value );
 
616
                        String[] name_parts = value.split( ";" );
 
617
                        for( int i = 0; i < name_parts.length; i++ )
 
618
                                name_parts[ i ] = name_parts[ i ].trim();
696
619
 
697
620
                        // build name
698
621
                        value = "";
704
627
                        // set name
705
628
                        setName( value );
706
629
                        _name_level = NAMELEVEL_N;
 
630
 
 
631
                        // check now to see if we need to import this contact (to avoid
 
632
                        // parsing the rest of the vCard unnecessarily)
 
633
                        if( !isImportRequired( getName() ) )
 
634
                                throw new SkipContactException();
707
635
                }
708
636
 
709
637
                private void parseFN( String[] params, String value )
 
638
                        throws ParseException, SkipContactException
710
639
                {
711
640
                        // already got a better name?
712
641
                        if( _name_level >= NAMELEVEL_FN ) return;
717
646
                }
718
647
 
719
648
                private void parseORG( String[] params, String value )
 
649
                        throws ParseException, SkipContactException
720
650
                {
 
651
                        // already got a better name?
 
652
                        if( _name_level >= NAMELEVEL_ORG ) return;
 
653
 
721
654
                        // get org parts
722
 
                        String[] org_parts = splitValueBySemicolon( value );
723
 
                        if( org_parts == null || org_parts.length < 1 ) return;
724
 
 
725
 
                        // build organisation name
726
 
                        StringBuilder builder = new StringBuilder(
727
 
                                String.valueOf( org_parts[ 0 ] ) );
728
 
                        for( int a = 1; a < org_parts.length; a++ )
729
 
                                builder.append( ", " ).append( org_parts[ a ] );
730
 
                        String organisation = builder.toString();
731
 
 
732
 
                        // set organisation name (using a title we've previously found)
733
 
                        addOrganisation( organisation, _cached_title, true );
734
 
 
735
 
                        // if we've not previously found a title, store this organisation
736
 
                        // name (we'll need it when we find a title to update the
737
 
                        // organisation, by name), else if we *have* previously found a
738
 
                        // title, clear it (since we just used it)
739
 
                        if( _cached_title == null )
740
 
                                _cached_organisation = organisation;
741
 
                        else
742
 
                                _cached_title = null;
743
 
                }
744
 
 
745
 
                private void parseTITLE( String[] params, String value )
746
 
                {
747
 
                        // if we previously had an organisation, look it up and append this
748
 
                        // title to it
749
 
                        if( _cached_organisation != null && hasOrganisations() ) {
750
 
                                HashMap< String, ExtraDetail > datas = getOrganisations();
751
 
                                ExtraDetail detail = datas.get( _cached_organisation );
752
 
                                if( detail != null )
753
 
                                        detail.setExtra( value );
754
 
                        }
755
 
 
756
 
                        // same as when handling organisation, if we've not previously found
757
 
                        // an organisation we store this title, else we clear it (since we
758
 
                        // just appended this title to it)
759
 
                        if( _cached_organisation == null )
760
 
                                _cached_title = value;
761
 
                        else
762
 
                                _cached_organisation = null;
 
655
                        String[] org_parts = value.split( ";" );
 
656
                        for( int i = 0; i < org_parts.length; i++ )
 
657
                                org_parts[ i ] = org_parts[ i ].trim();
 
658
 
 
659
                        // build name
 
660
                        if( org_parts.length > 1 && org_parts[ 0 ].length() == 0 )
 
661
                                value = org_parts[ 1 ];
 
662
                        else
 
663
                                value = org_parts[ 0 ];
 
664
 
 
665
                        // set name
 
666
                        setName( value );
 
667
                        _name_level = NAMELEVEL_ORG;
763
668
                }
764
669
 
765
670
                private void parseTEL( String[] params, String value )
 
671
                        throws ParseException
766
672
                {
767
673
                        if( value.length() == 0 ) return;
768
674
 
771
677
                                "PAGER", "BBS", "MODEM", "CAR", "ISDN", "VIDEO" ) );
772
678
 
773
679
                        // here's the logic...
774
 
                        boolean is_preferred = types.contains( "PREF" );
775
 
                        int type;
 
680
                        boolean preferred = types.contains( "PREF" );
 
681
                        int type = PhonesColumns.TYPE_MOBILE;
 
682
                        if( types.contains( "VOICE" ) )
 
683
                                if( types.contains( "WORK" ) )
 
684
                                        type = PhonesColumns.TYPE_WORK;
 
685
                                else
 
686
                                        type = PhonesColumns.TYPE_HOME;
 
687
                        else if( types.contains( "CELL" ) || types.contains( "VIDEO" ) )
 
688
                                type = PhonesColumns.TYPE_MOBILE;
776
689
                        if( types.contains( "FAX" ) )
777
690
                                if( types.contains( "HOME" ) )
778
691
                                        type = PhonesColumns.TYPE_FAX_HOME;
779
692
                                else
780
693
                                        type = PhonesColumns.TYPE_FAX_WORK;
781
 
                        else if( types.contains( "CELL" ) || types.contains( "VIDEO" ) )
782
 
                                type = PhonesColumns.TYPE_MOBILE;
783
 
                        else if( types.contains( "PAGER" ) )
 
694
                        if( types.contains( "PAGER" ) )
784
695
                                type = PhonesColumns.TYPE_PAGER;
785
 
                        else if( types.contains( "WORK" ) )
786
 
                                type = PhonesColumns.TYPE_WORK;
787
 
                        else
788
 
                                type = PhonesColumns.TYPE_HOME;
789
696
 
790
697
                        // add phone number
791
 
                        addNumber( value, type, is_preferred );
 
698
                        addPhone( value, type, preferred );
792
699
                }
793
700
 
794
701
                public void parseEMAIL( String[] params, String value )
 
702
                        throws ParseException
795
703
                {
796
704
                        if( value.length() == 0 ) return;
797
705
 
798
706
                        Set< String > types = extractTypes( params, Arrays.asList(
799
707
                                "PREF", "WORK", "HOME", "INTERNET" ) );
800
708
 
801
 
                        // add email address
802
 
                        boolean is_preferred = types.contains( "PREF" );
803
 
                        int type;
804
 
                        if( types.contains( "WORK" ) )
805
 
                                type = Contacts.ContactMethods.TYPE_WORK;
806
 
                        else
807
 
                                type = Contacts.ContactMethods.TYPE_HOME;
808
 
 
809
 
                        addEmail( value, type, is_preferred );
810
 
                }
811
 
 
812
 
                private void parseADR( String[] params, String value )
813
 
                {
814
 
                        // get address parts
815
 
                        String[] adr_parts = splitValueBySemicolon( value );
816
 
 
817
 
                        // build address
818
 
                        value = "";
819
 
                        for( int a = 0; a < adr_parts.length; a++ ) {
820
 
                                if( value.length() > 0 ) value += "\n";
821
 
                                value += adr_parts[ a ].trim();
822
 
                        }
823
 
 
824
 
                        Set< String > types = extractTypes( params, Arrays.asList(
825
 
                                "PREF", "WORK", "HOME", "INTERNET" ) );
826
 
 
827
 
                        // add address
828
 
                        int type;
829
 
                        if( types.contains( "WORK" ) )
830
 
                                type = Contacts.ContactMethods.TYPE_WORK;
831
 
                        else
832
 
                                type = Contacts.ContactMethods.TYPE_HOME;
833
 
 
834
 
                        addAddress( value, type );
 
709
                        // here's the logic...
 
710
                        boolean preferred = types.contains( "PREF" );
 
711
                        if( types.contains( "WORK" ) )
 
712
                                addEmail( value, Contacts.ContactMethods.TYPE_WORK, preferred );
 
713
                        else
 
714
                                addEmail( value, Contacts.ContactMethods.TYPE_HOME, preferred );
835
715
                }
836
716
 
837
717
                public void finaliseParsing()
842
722
                        if( _version == null && _buffers != null )
843
723
                                throw new ParseException( R.string.error_vcf_malformed );
844
724
 
845
 
                        // check if we should import this contact
846
 
                        try {
847
 
                                if( !isImportRequired( this ) )
848
 
                                        throw new SkipContactException();
849
 
                        }
850
 
                        catch( ContactNeedsMoreInfoException e ) {
851
 
                                throw new ParseException( R.string.error_vcf_notenoughinfo );
852
 
                        }
 
725
                        //  missing name properties?
 
726
                        if( _name_level == NAMELEVEL_NONE )
 
727
                                throw new ParseException( R.string.error_vcf_noname );
 
728
 
 
729
                        // check if we should import this one? If we've already got an 'N'-
 
730
                        // type name, this will already have been done by parseN() so we
 
731
                        // mustn't do this here (or it could prompt twice!)
 
732
                        if( _name_level < NAMELEVEL_N && !isImportRequired( getName() ) )
 
733
                                throw new SkipContactException();
853
734
                }
854
735
 
855
736
                private String checkParam( String[] params, String name )